diff options
author | Junio C Hamano <gitster@pobox.com> | 2021-09-20 15:20:39 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2021-09-20 15:20:39 -0700 |
commit | 0649303820cf88fb5a6ab440af15c8d6b8799d3f (patch) | |
tree | 1692a394a689bd99234ae8849c0e9e01e16fa8b1 | |
parent | Merge branch 'ps/fetch-optim' (diff) | |
parent | pack-bitmap: drop bitmap_index argument from try_partial_reuse() (diff) | |
download | tgif-0649303820cf88fb5a6ab440af15c8d6b8799d3f.tar.xz |
Merge branch 'tb/multi-pack-bitmaps'
The reachability bitmap file used to be generated only for a single
pack, but now we've learned to generate bitmaps for history that
span across multiple packfiles.
* tb/multi-pack-bitmaps: (29 commits)
pack-bitmap: drop bitmap_index argument from try_partial_reuse()
pack-bitmap: drop repository argument from prepare_midx_bitmap_git()
p5326: perf tests for MIDX bitmaps
p5310: extract full and partial bitmap tests
midx: respect 'GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP'
t7700: update to work with MIDX bitmap test knob
t5319: don't write MIDX bitmaps in t5319
t5310: disable GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP
t0410: disable GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP
t5326: test multi-pack bitmap behavior
t/helper/test-read-midx.c: add --checksum mode
t5310: move some tests to lib-bitmap.sh
pack-bitmap: write multi-pack bitmaps
pack-bitmap: read multi-pack bitmaps
pack-bitmap.c: avoid redundant calls to try_partial_reuse
pack-bitmap.c: introduce 'bitmap_is_preferred_refname()'
pack-bitmap.c: introduce 'nth_bitmap_object_oid()'
pack-bitmap.c: introduce 'bitmap_num_objects()'
midx: avoid opening multiple MIDXs when writing
midx: close linked MIDXs, avoid leaking memory
...
-rw-r--r-- | Documentation/git-multi-pack-index.txt | 20 | ||||
-rw-r--r-- | Documentation/technical/bitmap-format.txt | 71 | ||||
-rw-r--r-- | Documentation/technical/multi-pack-index.txt | 10 | ||||
-rw-r--r-- | builtin/commit-graph.c | 22 | ||||
-rw-r--r-- | builtin/multi-pack-index.c | 2 | ||||
-rw-r--r-- | builtin/pack-objects.c | 8 | ||||
-rw-r--r-- | builtin/repack.c | 12 | ||||
-rwxr-xr-x | ci/run-build-and-tests.sh | 1 | ||||
-rw-r--r-- | git.c | 2 | ||||
-rw-r--r-- | midx.c | 331 | ||||
-rw-r--r-- | midx.h | 5 | ||||
-rw-r--r-- | object-file.c | 21 | ||||
-rw-r--r-- | object-store.h | 1 | ||||
-rw-r--r-- | pack-bitmap-write.c | 79 | ||||
-rw-r--r-- | pack-bitmap.c | 497 | ||||
-rw-r--r-- | pack-bitmap.h | 8 | ||||
-rw-r--r-- | packfile.c | 2 | ||||
-rw-r--r-- | t/README | 4 | ||||
-rw-r--r-- | t/helper/test-read-midx.c | 16 | ||||
-rw-r--r-- | t/lib-bitmap.sh | 240 | ||||
-rw-r--r-- | t/perf/lib-bitmap.sh | 69 | ||||
-rwxr-xr-x | t/perf/p5310-pack-bitmaps.sh | 65 | ||||
-rwxr-xr-x | t/perf/p5326-multi-pack-bitmaps.sh | 43 | ||||
-rwxr-xr-x | t/t0410-partial-clone.sh | 12 | ||||
-rwxr-xr-x | t/t5310-pack-bitmaps.sh | 233 | ||||
-rwxr-xr-x | t/t5319-multi-pack-index.sh | 63 | ||||
-rwxr-xr-x | t/t5326-multi-pack-bitmaps.sh | 286 | ||||
-rwxr-xr-x | t/t7700-repack.sh | 18 |
28 files changed, 1676 insertions, 465 deletions
diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt index ffd601bc17..a9df3dbd32 100644 --- a/Documentation/git-multi-pack-index.txt +++ b/Documentation/git-multi-pack-index.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git multi-pack-index' [--object-dir=<dir>] [--[no-]progress] - [--preferred-pack=<pack>] <subcommand> + [--preferred-pack=<pack>] [--[no-]bitmap] <subcommand> DESCRIPTION ----------- @@ -23,6 +23,8 @@ OPTIONS Use given directory for the location of Git objects. We check `<dir>/packs/multi-pack-index` for the current MIDX file, and `<dir>/packs` for the pack-files to index. ++ +`<dir>` must be an alternate of the current repository. --[no-]progress:: Turn progress on/off explicitly. If neither is specified, progress is @@ -37,9 +39,12 @@ write:: -- --preferred-pack=<pack>:: Optionally specify the tie-breaking pack used when - multiple packs contain the same object. If not given, - ties are broken in favor of the pack with the lowest - mtime. + multiple packs contain the same object. `<pack>` must + contain at least one object. If not given, ties are + broken in favor of the pack with the lowest mtime. + + --[no-]bitmap:: + Control whether or not a multi-pack bitmap is written. -- verify:: @@ -81,6 +86,13 @@ EXAMPLES $ git multi-pack-index write ----------------------------------------------- +* Write a MIDX file for the packfiles in the current .git folder with a +corresponding bitmap. ++ +------------------------------------------------------------- +$ git multi-pack-index write --preferred-pack=<pack> --bitmap +------------------------------------------------------------- + * Write a MIDX file for the packfiles in an alternate object store. + ----------------------------------------------- diff --git a/Documentation/technical/bitmap-format.txt b/Documentation/technical/bitmap-format.txt index f8c18a0f7a..04b3ec2178 100644 --- a/Documentation/technical/bitmap-format.txt +++ b/Documentation/technical/bitmap-format.txt @@ -1,6 +1,44 @@ GIT bitmap v1 format ==================== +== Pack and multi-pack bitmaps + +Bitmaps store reachability information about the set of objects in a packfile, +or a multi-pack index (MIDX). The former is defined obviously, and the latter is +defined as the union of objects in packs contained in the MIDX. + +A bitmap may belong to either one pack, or the repository's multi-pack index (if +it exists). A repository may have at most one bitmap. + +An object is uniquely described by its bit position within a bitmap: + + - If the bitmap belongs to a packfile, the __n__th bit corresponds to + the __n__th object in pack order. For a function `offset` which maps + objects to their byte offset within a pack, pack order is defined as + follows: + + o1 <= o2 <==> offset(o1) <= offset(o2) + + - If the bitmap belongs to a MIDX, the __n__th bit corresponds to the + __n__th object in MIDX order. With an additional function `pack` which + maps objects to the pack they were selected from by the MIDX, MIDX order + is defined as follows: + + o1 <= o2 <==> pack(o1) <= pack(o2) /\ offset(o1) <= offset(o2) + + The ordering between packs is done according to the MIDX's .rev file. + Notably, the preferred pack sorts ahead of all other packs. + +The on-disk representation (described below) of a bitmap is the same regardless +of whether or not that bitmap belongs to a packfile or a MIDX. The only +difference is the interpretation of the bits, which is described above. + +Certain bitmap extensions are supported (see: Appendix B). No extensions are +required for bitmaps corresponding to packfiles. For bitmaps that correspond to +MIDXs, both the bit-cache and rev-cache extensions are required. + +== On-disk format + - A header appears at the beginning: 4-byte signature: {'B', 'I', 'T', 'M'} @@ -14,17 +52,19 @@ GIT bitmap v1 format The following flags are supported: - BITMAP_OPT_FULL_DAG (0x1) REQUIRED - This flag must always be present. It implies that the bitmap - index has been generated for a packfile with full closure - (i.e. where every single object in the packfile can find - its parent links inside the same packfile). This is a - requirement for the bitmap index format, also present in JGit, - that greatly reduces the complexity of the implementation. + This flag must always be present. It implies that the + bitmap index has been generated for a packfile or + multi-pack index (MIDX) with full closure (i.e. where + every single object in the packfile/MIDX can find its + parent links inside the same packfile/MIDX). This is a + requirement for the bitmap index format, also present in + JGit, that greatly reduces the complexity of the + implementation. - BITMAP_OPT_HASH_CACHE (0x4) If present, the end of the bitmap file contains `N` 32-bit name-hash values, one per object in the - pack. The format and meaning of the name-hash is + pack/MIDX. The format and meaning of the name-hash is described below. 4-byte entry count (network byte order) @@ -33,7 +73,8 @@ GIT bitmap v1 format 20-byte checksum - The SHA1 checksum of the pack this bitmap index belongs to. + The SHA1 checksum of the pack/MIDX this bitmap index + belongs to. - 4 EWAH bitmaps that act as type indexes @@ -50,7 +91,7 @@ GIT bitmap v1 format - Tags In each bitmap, the `n`th bit is set to true if the `n`th object - in the packfile is of that type. + in the packfile or multi-pack index is of that type. The obvious consequence is that the OR of all 4 bitmaps will result in a full set (all bits set), and the AND of all 4 bitmaps will @@ -62,8 +103,9 @@ GIT bitmap v1 format Each entry contains the following: - 4-byte object position (network byte order) - The position **in the index for the packfile** where the - bitmap for this commit is found. + The position **in the index for the packfile or + multi-pack index** where the bitmap for this commit is + found. - 1-byte XOR-offset The xor offset used to compress this bitmap. For an entry @@ -146,10 +188,11 @@ Name-hash cache --------------- If the BITMAP_OPT_HASH_CACHE flag is set, the end of the bitmap contains -a cache of 32-bit values, one per object in the pack. The value at +a cache of 32-bit values, one per object in the pack/MIDX. The value at position `i` is the hash of the pathname at which the `i`th object -(counting in index order) in the pack can be found. This can be fed -into the delta heuristics to compare objects with similar pathnames. +(counting in index or multi-pack index order) in the pack/MIDX can be found. +This can be fed into the delta heuristics to compare objects with similar +pathnames. The hash algorithm used is: diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt index fb688976c4..1a73c3ee20 100644 --- a/Documentation/technical/multi-pack-index.txt +++ b/Documentation/technical/multi-pack-index.txt @@ -71,14 +71,10 @@ Future Work still reducing the number of binary searches required for object lookups. -- The reachability bitmap is currently paired directly with a single - packfile, using the pack-order as the object order to hopefully - compress the bitmaps well using run-length encoding. This could be - extended to pair a reachability bitmap with a multi-pack-index. If - the multi-pack-index is extended to store a "stable object order" +- If the multi-pack-index is extended to store a "stable object order" (a function Order(hash) = integer that is constant for a given hash, - even as the multi-pack-index is updated) then a reachability bitmap - could point to a multi-pack-index and be updated independently. + even as the multi-pack-index is updated) then MIDX bitmaps could be + updated independently of the MIDX. - Packfiles can be marked as "special" using empty files that share the initial name but replace ".pack" with ".keep" or ".promisor". diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 21fc6e934b..0386f5c775 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -60,28 +60,6 @@ static struct option *add_common_options(struct option *to) return parse_options_concat(common_opts, to); } -static struct object_directory *find_odb(struct repository *r, - const char *obj_dir) -{ - struct object_directory *odb; - char *obj_dir_real = real_pathdup(obj_dir, 1); - struct strbuf odb_path_real = STRBUF_INIT; - - prepare_alt_odb(r); - for (odb = r->objects->odb; odb; odb = odb->next) { - strbuf_realpath(&odb_path_real, odb->path, 1); - if (!strcmp(obj_dir_real, odb_path_real.buf)) - break; - } - - free(obj_dir_real); - strbuf_release(&odb_path_real); - - if (!odb) - die(_("could not find object directory matching %s"), obj_dir); - return odb; -} - static int graph_verify(int argc, const char **argv) { struct commit_graph *graph = NULL; diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c index 649aa5f9ab..66de6efd41 100644 --- a/builtin/multi-pack-index.c +++ b/builtin/multi-pack-index.c @@ -68,6 +68,8 @@ static int cmd_multi_pack_index_write(int argc, const char **argv) OPT_STRING(0, "preferred-pack", &opts.preferred_pack, N_("preferred-pack"), N_("pack for reuse when computing a multi-pack bitmap")), + OPT_BIT(0, "bitmap", &opts.flags, N_("write multi-pack bitmap"), + MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX), OPT_END(), }; diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index ec8503563a..e27f7cdb91 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1124,6 +1124,11 @@ static void write_reused_pack(struct hashfile *f) break; offset += ewah_bit_ctz64(word >> offset); + /* + * Can use bit positions directly, even for MIDX + * bitmaps. See comment in try_partial_reuse() + * for why. + */ write_reused_pack_one(pos + offset, f, &w_curs); display_progress(progress_state, ++written); } @@ -1256,7 +1261,8 @@ static void write_pack_file(void) bitmap_writer_show_progress(progress); bitmap_writer_select_commits(indexed_commits, indexed_commits_nr, -1); - bitmap_writer_build(&to_pack); + if (bitmap_writer_build(&to_pack) < 0) + die(_("failed to write bitmap index")); bitmap_writer_finish(written_list, nr_written, tmpname.buf, write_bitmap_options); write_bitmap_index = 0; diff --git a/builtin/repack.c b/builtin/repack.c index 5f9bc74adc..82ab668272 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -515,6 +515,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix) if (!(pack_everything & ALL_INTO_ONE) || !is_bare_repository()) write_bitmaps = 0; + } else if (write_bitmaps && + git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0) && + git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0)) { + write_bitmaps = 0; } if (pack_kept_objects < 0) pack_kept_objects = write_bitmaps > 0; @@ -725,8 +729,12 @@ int cmd_repack(int argc, const char **argv, const char *prefix) update_server_info(0); remove_temporary_files(); - if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) - write_midx_file(get_object_directory(), NULL, 0); + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX, 0)) { + unsigned flags = 0; + if (git_env_bool(GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP, 0)) + flags |= MIDX_WRITE_BITMAP | MIDX_WRITE_REV_INDEX; + write_midx_file(get_object_directory(), NULL, flags); + } string_list_clear(&names, 0); string_list_clear(&rollback, 0); diff --git a/ci/run-build-and-tests.sh b/ci/run-build-and-tests.sh index f3aba5d6cb..cc62616d80 100755 --- a/ci/run-build-and-tests.sh +++ b/ci/run-build-and-tests.sh @@ -28,6 +28,7 @@ linux-gcc) export GIT_TEST_COMMIT_GRAPH=1 export GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=1 export GIT_TEST_MULTI_PACK_INDEX=1 + export GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=1 export GIT_TEST_ADD_I_USE_BUILTIN=1 export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=master export GIT_TEST_WRITE_REV_INDEX=1 @@ -561,7 +561,7 @@ static struct cmd_struct commands[] = { { "merge-tree", cmd_merge_tree, RUN_SETUP | NO_PARSEOPT }, { "mktag", cmd_mktag, RUN_SETUP | NO_PARSEOPT }, { "mktree", cmd_mktree, RUN_SETUP }, - { "multi-pack-index", cmd_multi_pack_index, RUN_SETUP_GENTLY }, + { "multi-pack-index", cmd_multi_pack_index, RUN_SETUP }, { "mv", cmd_mv, RUN_SETUP | NEED_WORK_TREE }, { "name-rev", cmd_name_rev, RUN_SETUP }, { "notes", cmd_notes, RUN_SETUP }, @@ -13,6 +13,10 @@ #include "repository.h" #include "chunk-format.h" #include "pack.h" +#include "pack-bitmap.h" +#include "refs.h" +#include "revision.h" +#include "list-objects.h" #define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */ #define MIDX_VERSION 1 @@ -48,12 +52,12 @@ static uint8_t oid_version(void) } } -static const unsigned char *get_midx_checksum(struct multi_pack_index *m) +const unsigned char *get_midx_checksum(struct multi_pack_index *m) { return m->data + m->data_len - the_hash_algo->rawsz; } -static char *get_midx_filename(const char *object_dir) +char *get_midx_filename(const char *object_dir) { return xstrfmt("%s/pack/multi-pack-index", object_dir); } @@ -195,6 +199,8 @@ void close_midx(struct multi_pack_index *m) if (!m) return; + close_midx(m->next); + munmap((unsigned char *)m->data, m->data_len); for (i = 0; i < m->num_packs; i++) { @@ -203,6 +209,7 @@ void close_midx(struct multi_pack_index *m) } FREE_AND_NULL(m->packs); FREE_AND_NULL(m->pack_names); + free(m); } int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id) @@ -882,7 +889,7 @@ static void write_midx_reverse_index(char *midx_name, unsigned char *midx_hash, strbuf_release(&buf); } -static void clear_midx_files_ext(struct repository *r, const char *ext, +static void clear_midx_files_ext(const char *object_dir, const char *ext, unsigned char *keep_hash); static int midx_checksum_valid(struct multi_pack_index *m) @@ -890,7 +897,167 @@ static int midx_checksum_valid(struct multi_pack_index *m) return hashfile_checksum_valid(m->data, m->data_len); } -static int write_midx_internal(const char *object_dir, struct multi_pack_index *m, +static void prepare_midx_packing_data(struct packing_data *pdata, + struct write_midx_context *ctx) +{ + uint32_t i; + + memset(pdata, 0, sizeof(struct packing_data)); + prepare_packing_data(the_repository, pdata); + + for (i = 0; i < ctx->entries_nr; i++) { + struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]]; + struct object_entry *to = packlist_alloc(pdata, &from->oid); + + oe_set_in_pack(pdata, to, + ctx->info[ctx->pack_perm[from->pack_int_id]].p); + } +} + +static int add_ref_to_pending(const char *refname, + const struct object_id *oid, + int flag, void *cb_data) +{ + struct rev_info *revs = (struct rev_info*)cb_data; + struct object *object; + + if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { + warning("symbolic ref is dangling: %s", refname); + return 0; + } + + object = parse_object_or_die(oid, refname); + if (object->type != OBJ_COMMIT) + return 0; + + add_pending_object(revs, object, ""); + if (bitmap_is_preferred_refname(revs->repo, refname)) + object->flags |= NEEDS_BITMAP; + return 0; +} + +struct bitmap_commit_cb { + struct commit **commits; + size_t commits_nr, commits_alloc; + + struct write_midx_context *ctx; +}; + +static const struct object_id *bitmap_oid_access(size_t index, + const void *_entries) +{ + const struct pack_midx_entry *entries = _entries; + return &entries[index].oid; +} + +static void bitmap_show_commit(struct commit *commit, void *_data) +{ + struct bitmap_commit_cb *data = _data; + int pos = oid_pos(&commit->object.oid, data->ctx->entries, + data->ctx->entries_nr, + bitmap_oid_access); + if (pos < 0) + return; + + ALLOC_GROW(data->commits, data->commits_nr + 1, data->commits_alloc); + data->commits[data->commits_nr++] = commit; +} + +static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p, + struct write_midx_context *ctx) +{ + struct rev_info revs; + struct bitmap_commit_cb cb = {0}; + + cb.ctx = ctx; + + repo_init_revisions(the_repository, &revs, NULL); + setup_revisions(0, NULL, &revs, NULL); + for_each_ref(add_ref_to_pending, &revs); + + /* + * Skipping promisor objects here is intentional, since it only excludes + * them from the list of reachable commits that we want to select from + * when computing the selection of MIDX'd commits to receive bitmaps. + * + * Reachability bitmaps do require that their objects be closed under + * reachability, but fetching any objects missing from promisors at this + * point is too late. But, if one of those objects can be reached from + * an another object that is included in the bitmap, then we will + * complain later that we don't have reachability closure (and fail + * appropriately). + */ + fetch_if_missing = 0; + revs.exclude_promisor_objects = 1; + + if (prepare_revision_walk(&revs)) + die(_("revision walk setup failed")); + + traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb); + if (indexed_commits_nr_p) + *indexed_commits_nr_p = cb.commits_nr; + + return cb.commits; +} + +static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, + struct write_midx_context *ctx, + unsigned flags) +{ + struct packing_data pdata; + struct pack_idx_entry **index; + struct commit **commits = NULL; + uint32_t i, commits_nr; + char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash)); + int ret; + + prepare_midx_packing_data(&pdata, ctx); + + commits = find_commits_for_midx_bitmap(&commits_nr, ctx); + + /* + * Build the MIDX-order index based on pdata.objects (which is already + * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of + * this order). + */ + ALLOC_ARRAY(index, pdata.nr_objects); + for (i = 0; i < pdata.nr_objects; i++) + index[i] = &pdata.objects[i].idx; + + bitmap_writer_show_progress(flags & MIDX_PROGRESS); + bitmap_writer_build_type_index(&pdata, index, pdata.nr_objects); + + /* + * bitmap_writer_finish expects objects in lex order, but pack_order + * gives us exactly that. use it directly instead of re-sorting the + * array. + * + * This changes the order of objects in 'index' between + * bitmap_writer_build_type_index and bitmap_writer_finish. + * + * The same re-ordering takes place in the single-pack bitmap code via + * write_idx_file(), which is called by finish_tmp_packfile(), which + * happens between bitmap_writer_build_type_index() and + * bitmap_writer_finish(). + */ + for (i = 0; i < pdata.nr_objects; i++) + index[ctx->pack_order[i]] = &pdata.objects[i].idx; + + bitmap_writer_select_commits(commits, commits_nr, -1); + ret = bitmap_writer_build(&pdata); + if (ret < 0) + goto cleanup; + + bitmap_writer_set_checksum(midx_hash); + bitmap_writer_finish(index, pdata.nr_objects, bitmap_name, 0); + +cleanup: + free(index); + free(bitmap_name); + return ret; +} + +static int write_midx_internal(const char *object_dir, struct string_list *packs_to_drop, const char *preferred_pack_name, unsigned flags) @@ -901,20 +1068,26 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * struct hashfile *f = NULL; struct lock_file lk; struct write_midx_context ctx = { 0 }; + struct multi_pack_index *cur; int pack_name_concat_len = 0; int dropped_packs = 0; int result = 0; struct chunkfile *cf; + /* Ensure the given object_dir is local, or a known alternate. */ + find_odb(the_repository, object_dir); + midx_name = get_midx_filename(object_dir); if (safe_create_leading_directories(midx_name)) die_errno(_("unable to create leading directories of %s"), midx_name); - if (m) - ctx.m = m; - else - ctx.m = load_multi_pack_index(object_dir, 1); + for (cur = get_multi_pack_index(the_repository); cur; cur = cur->next) { + if (!strcmp(object_dir, cur->object_dir)) { + ctx.m = cur; + break; + } + } if (ctx.m && !midx_checksum_valid(ctx.m)) { warning(_("ignoring existing multi-pack-index; checksum mismatch")); @@ -932,8 +1105,27 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * ctx.info[ctx.nr].orig_pack_int_id = i; ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]); - ctx.info[ctx.nr].p = NULL; + ctx.info[ctx.nr].p = ctx.m->packs[i]; ctx.info[ctx.nr].expired = 0; + + if (flags & MIDX_WRITE_REV_INDEX) { + /* + * If generating a reverse index, need to have + * packed_git's loaded to compare their + * mtimes and object count. + */ + if (prepare_midx_pack(the_repository, ctx.m, i)) { + error(_("could not load pack")); + result = 1; + goto cleanup; + } + + if (open_pack_index(ctx.m->packs[i])) + die(_("could not open index for %s"), + ctx.m->packs[i]->pack_name); + ctx.info[ctx.nr].p = ctx.m->packs[i]; + } + ctx.nr++; } } @@ -947,18 +1139,89 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx); stop_progress(&ctx.progress); - if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop) - goto cleanup; + if (ctx.m && ctx.nr == ctx.m->num_packs && !packs_to_drop) { + struct bitmap_index *bitmap_git; + int bitmap_exists; + int want_bitmap = flags & MIDX_WRITE_BITMAP; + + bitmap_git = prepare_midx_bitmap_git(ctx.m); + bitmap_exists = bitmap_git && bitmap_is_midx(bitmap_git); + free_bitmap_index(bitmap_git); + + if (bitmap_exists || !want_bitmap) { + /* + * The correct MIDX already exists, and so does a + * corresponding bitmap (or one wasn't requested). + */ + if (!want_bitmap) + clear_midx_files_ext(object_dir, ".bitmap", + NULL); + goto cleanup; + } + } - ctx.preferred_pack_idx = -1; if (preferred_pack_name) { + int found = 0; for (i = 0; i < ctx.nr; i++) { if (!cmp_idx_or_pack_name(preferred_pack_name, ctx.info[i].pack_name)) { ctx.preferred_pack_idx = i; + found = 1; break; } } + + if (!found) + warning(_("unknown preferred pack: '%s'"), + preferred_pack_name); + } else if (ctx.nr && + (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))) { + struct packed_git *oldest = ctx.info[ctx.preferred_pack_idx].p; + ctx.preferred_pack_idx = 0; + + if (packs_to_drop && packs_to_drop->nr) + BUG("cannot write a MIDX bitmap during expiration"); + + /* + * set a preferred pack when writing a bitmap to ensure that + * the pack from which the first object is selected in pseudo + * pack-order has all of its objects selected from that pack + * (and not another pack containing a duplicate) + */ + for (i = 1; i < ctx.nr; i++) { + struct packed_git *p = ctx.info[i].p; + + if (!oldest->num_objects || p->mtime < oldest->mtime) { + oldest = p; + ctx.preferred_pack_idx = i; + } + } + + if (!oldest->num_objects) { + /* + * If all packs are empty; unset the preferred index. + * This is acceptable since there will be no duplicate + * objects to resolve, so the preferred value doesn't + * matter. + */ + ctx.preferred_pack_idx = -1; + } + } else { + /* + * otherwise don't mark any pack as preferred to avoid + * interfering with expiration logic below + */ + ctx.preferred_pack_idx = -1; + } + + if (ctx.preferred_pack_idx > -1) { + struct packed_git *preferred = ctx.info[ctx.preferred_pack_idx].p; + if (!preferred->num_objects) { + error(_("cannot select preferred pack %s with no objects"), + preferred->pack_name); + result = 1; + goto cleanup; + } } ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr, @@ -1029,11 +1292,7 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * ctx.info, ctx.nr, sizeof(*ctx.info), idx_or_pack_name_cmp); - - if (!preferred) - warning(_("unknown preferred pack: '%s'"), - preferred_pack_name); - else { + if (preferred) { uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id]; if (perm == PACK_EXPIRED) warning(_("preferred pack '%s' is expired"), @@ -1048,9 +1307,6 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR); f = hashfd(get_lock_file_fd(&lk), get_lock_file_path(&lk)); - if (ctx.m) - close_midx(ctx.m); - if (ctx.nr - dropped_packs == 0) { error(_("no pack files to index.")); result = 1; @@ -1081,15 +1337,27 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM); free_chunkfile(cf); - if (flags & MIDX_WRITE_REV_INDEX) + if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP)) ctx.pack_order = midx_pack_order(&ctx); if (flags & MIDX_WRITE_REV_INDEX) write_midx_reverse_index(midx_name, midx_hash, &ctx); - clear_midx_files_ext(the_repository, ".rev", midx_hash); + if (flags & MIDX_WRITE_BITMAP) { + if (write_midx_bitmap(midx_name, midx_hash, &ctx, flags) < 0) { + error(_("could not write multi-pack bitmap")); + result = 1; + goto cleanup; + } + } + + if (ctx.m) + close_object_store(the_repository->objects); commit_lock_file(&lk); + clear_midx_files_ext(object_dir, ".bitmap", midx_hash); + clear_midx_files_ext(object_dir, ".rev", midx_hash); + cleanup: for (i = 0; i < ctx.nr; i++) { if (ctx.info[i].p) { @@ -1104,6 +1372,7 @@ cleanup: free(ctx.pack_perm); free(ctx.pack_order); free(midx_name); + return result; } @@ -1111,8 +1380,7 @@ int write_midx_file(const char *object_dir, const char *preferred_pack_name, unsigned flags) { - return write_midx_internal(object_dir, NULL, NULL, preferred_pack_name, - flags); + return write_midx_internal(object_dir, NULL, preferred_pack_name, flags); } struct clear_midx_data { @@ -1135,7 +1403,7 @@ static void clear_midx_file_ext(const char *full_path, size_t full_path_len, die_errno(_("failed to remove %s"), full_path); } -static void clear_midx_files_ext(struct repository *r, const char *ext, +static void clear_midx_files_ext(const char *object_dir, const char *ext, unsigned char *keep_hash) { struct clear_midx_data data; @@ -1146,7 +1414,7 @@ static void clear_midx_files_ext(struct repository *r, const char *ext, hash_to_hex(keep_hash), ext); data.ext = ext; - for_each_file_in_pack_dir(r->objects->odb->path, + for_each_file_in_pack_dir(object_dir, clear_midx_file_ext, &data); @@ -1165,7 +1433,8 @@ void clear_midx_file(struct repository *r) if (remove_path(midx)) die(_("failed to clear multi-pack-index at %s"), midx); - clear_midx_files_ext(r, ".rev", NULL); + clear_midx_files_ext(r->objects->odb->path, ".bitmap", NULL); + clear_midx_files_ext(r->objects->odb->path, ".rev", NULL); free(midx); } @@ -1390,8 +1659,10 @@ int expire_midx_packs(struct repository *r, const char *object_dir, unsigned fla free(count); - if (packs_to_drop.nr) - result = write_midx_internal(object_dir, m, &packs_to_drop, NULL, flags); + if (packs_to_drop.nr) { + result = write_midx_internal(object_dir, &packs_to_drop, NULL, flags); + m = NULL; + } string_list_clear(&packs_to_drop, 0); return result; @@ -1580,7 +1851,7 @@ int midx_repack(struct repository *r, const char *object_dir, size_t batch_size, goto cleanup; } - result = write_midx_internal(object_dir, m, NULL, NULL, flags); + result = write_midx_internal(object_dir, NULL, NULL, flags); m = NULL; cleanup: @@ -8,6 +8,8 @@ struct pack_entry; struct repository; #define GIT_TEST_MULTI_PACK_INDEX "GIT_TEST_MULTI_PACK_INDEX" +#define GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP \ + "GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP" struct multi_pack_index { struct multi_pack_index *next; @@ -41,7 +43,10 @@ struct multi_pack_index { #define MIDX_PROGRESS (1 << 0) #define MIDX_WRITE_REV_INDEX (1 << 1) +#define MIDX_WRITE_BITMAP (1 << 2) +const unsigned char *get_midx_checksum(struct multi_pack_index *m); +char *get_midx_filename(const char *object_dir); char *get_midx_rev_filename(struct multi_pack_index *m); struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local); diff --git a/object-file.c b/object-file.c index a8be899481..a4d720b4f5 100644 --- a/object-file.c +++ b/object-file.c @@ -820,6 +820,27 @@ out: return ref_git; } +struct object_directory *find_odb(struct repository *r, const char *obj_dir) +{ + struct object_directory *odb; + char *obj_dir_real = real_pathdup(obj_dir, 1); + struct strbuf odb_path_real = STRBUF_INIT; + + prepare_alt_odb(r); + for (odb = r->objects->odb; odb; odb = odb->next) { + strbuf_realpath(&odb_path_real, odb->path, 1); + if (!strcmp(obj_dir_real, odb_path_real.buf)) + break; + } + + free(obj_dir_real); + strbuf_release(&odb_path_real); + + if (!odb) + die(_("could not find object directory matching %s"), obj_dir); + return odb; +} + static void fill_alternate_refs_command(struct child_process *cmd, const char *repo_path) { diff --git a/object-store.h b/object-store.h index b4dc6668aa..ebc55274e6 100644 --- a/object-store.h +++ b/object-store.h @@ -38,6 +38,7 @@ KHASH_INIT(odb_path_map, const char * /* key: odb_path */, void prepare_alt_odb(struct repository *r); char *compute_alternate_path(const char *path, struct strbuf *err); +struct object_directory *find_odb(struct repository *r, const char *obj_dir); typedef int alt_odb_fn(struct object_directory *, void *); int foreach_alt_odb(alt_odb_fn, void*); typedef void alternate_ref_fn(const struct object_id *oid, void *); diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 88d9e696a5..9c55c1531e 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -48,7 +48,7 @@ void bitmap_writer_show_progress(int show) } /** - * Build the initial type index for the packfile + * Build the initial type index for the packfile or multi-pack-index */ void bitmap_writer_build_type_index(struct packing_data *to_pack, struct pack_idx_entry **index, @@ -125,15 +125,20 @@ static inline void push_bitmapped_commit(struct commit *commit) writer.selected_nr++; } -static uint32_t find_object_pos(const struct object_id *oid) +static uint32_t find_object_pos(const struct object_id *oid, int *found) { struct object_entry *entry = packlist_find(writer.to_pack, oid); if (!entry) { - die("Failed to write bitmap index. Packfile doesn't have full closure " + if (found) + *found = 0; + warning("Failed to write bitmap index. Packfile doesn't have full closure " "(object %s is missing)", oid_to_hex(oid)); + return 0; } + if (found) + *found = 1; return oe_in_pack_pos(writer.to_pack, entry); } @@ -331,9 +336,10 @@ static void bitmap_builder_clear(struct bitmap_builder *bb) bb->commits_nr = bb->commits_alloc = 0; } -static void fill_bitmap_tree(struct bitmap *bitmap, - struct tree *tree) +static int fill_bitmap_tree(struct bitmap *bitmap, + struct tree *tree) { + int found; uint32_t pos; struct tree_desc desc; struct name_entry entry; @@ -342,9 +348,11 @@ static void fill_bitmap_tree(struct bitmap *bitmap, * If our bit is already set, then there is nothing to do. Both this * tree and all of its children will be set. */ - pos = find_object_pos(&tree->object.oid); + pos = find_object_pos(&tree->object.oid, &found); + if (!found) + return -1; if (bitmap_get(bitmap, pos)) - return; + return 0; bitmap_set(bitmap, pos); if (parse_tree(tree) < 0) @@ -355,11 +363,15 @@ static void fill_bitmap_tree(struct bitmap *bitmap, while (tree_entry(&desc, &entry)) { switch (object_type(entry.mode)) { case OBJ_TREE: - fill_bitmap_tree(bitmap, - lookup_tree(the_repository, &entry.oid)); + if (fill_bitmap_tree(bitmap, + lookup_tree(the_repository, &entry.oid)) < 0) + return -1; break; case OBJ_BLOB: - bitmap_set(bitmap, find_object_pos(&entry.oid)); + pos = find_object_pos(&entry.oid, &found); + if (!found) + return -1; + bitmap_set(bitmap, pos); break; default: /* Gitlink, etc; not reachable */ @@ -368,15 +380,18 @@ static void fill_bitmap_tree(struct bitmap *bitmap, } free_tree_buffer(tree); + return 0; } -static void fill_bitmap_commit(struct bb_commit *ent, - struct commit *commit, - struct prio_queue *queue, - struct prio_queue *tree_queue, - struct bitmap_index *old_bitmap, - const uint32_t *mapping) +static int fill_bitmap_commit(struct bb_commit *ent, + struct commit *commit, + struct prio_queue *queue, + struct prio_queue *tree_queue, + struct bitmap_index *old_bitmap, + const uint32_t *mapping) { + int found; + uint32_t pos; if (!ent->bitmap) ent->bitmap = bitmap_new(); @@ -401,11 +416,16 @@ static void fill_bitmap_commit(struct bb_commit *ent, * Mark ourselves and queue our tree. The commit * walk ensures we cover all parents. */ - bitmap_set(ent->bitmap, find_object_pos(&c->object.oid)); + pos = find_object_pos(&c->object.oid, &found); + if (!found) + return -1; + bitmap_set(ent->bitmap, pos); prio_queue_put(tree_queue, get_commit_tree(c)); for (p = c->parents; p; p = p->next) { - int pos = find_object_pos(&p->item->object.oid); + pos = find_object_pos(&p->item->object.oid, &found); + if (!found) + return -1; if (!bitmap_get(ent->bitmap, pos)) { bitmap_set(ent->bitmap, pos); prio_queue_put(queue, p->item); @@ -413,8 +433,12 @@ static void fill_bitmap_commit(struct bb_commit *ent, } } - while (tree_queue->nr) - fill_bitmap_tree(ent->bitmap, prio_queue_get(tree_queue)); + while (tree_queue->nr) { + if (fill_bitmap_tree(ent->bitmap, + prio_queue_get(tree_queue)) < 0) + return -1; + } + return 0; } static void store_selected(struct bb_commit *ent, struct commit *commit) @@ -432,7 +456,7 @@ static void store_selected(struct bb_commit *ent, struct commit *commit) kh_value(writer.bitmaps, hash_pos) = stored; } -void bitmap_writer_build(struct packing_data *to_pack) +int bitmap_writer_build(struct packing_data *to_pack) { struct bitmap_builder bb; size_t i; @@ -441,6 +465,7 @@ void bitmap_writer_build(struct packing_data *to_pack) struct prio_queue tree_queue = { NULL }; struct bitmap_index *old_bitmap; uint32_t *mapping; + int closed = 1; /* until proven otherwise */ writer.bitmaps = kh_init_oid_map(); writer.to_pack = to_pack; @@ -463,8 +488,11 @@ void bitmap_writer_build(struct packing_data *to_pack) struct commit *child; int reused = 0; - fill_bitmap_commit(ent, commit, &queue, &tree_queue, - old_bitmap, mapping); + if (fill_bitmap_commit(ent, commit, &queue, &tree_queue, + old_bitmap, mapping) < 0) { + closed = 0; + break; + } if (ent->selected) { store_selected(ent, commit); @@ -492,6 +520,7 @@ void bitmap_writer_build(struct packing_data *to_pack) clear_prio_queue(&queue); clear_prio_queue(&tree_queue); bitmap_builder_clear(&bb); + free_bitmap_index(old_bitmap); free(mapping); trace2_region_leave("pack-bitmap-write", "building_bitmaps_total", @@ -499,7 +528,9 @@ void bitmap_writer_build(struct packing_data *to_pack) stop_progress(&writer.progress); - compute_xor_offsets(); + if (closed) + compute_xor_offsets(); + return closed ? 0 : -1; } /** diff --git a/pack-bitmap.c b/pack-bitmap.c index d999616c9e..8504110a4d 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -13,6 +13,7 @@ #include "repository.h" #include "object-store.h" #include "list-objects-filter-options.h" +#include "midx.h" #include "config.h" /* @@ -35,8 +36,15 @@ struct stored_bitmap { * the active bitmap index is the largest one. */ struct bitmap_index { - /* Packfile to which this bitmap index belongs to */ + /* + * The pack or multi-pack index (MIDX) that this bitmap index belongs + * to. + * + * Exactly one of these must be non-NULL; this specifies the object + * order used to interpret this bitmap. + */ struct packed_git *pack; + struct multi_pack_index *midx; /* * Mark the first `reuse_objects` in the packfile as reused: @@ -71,6 +79,9 @@ struct bitmap_index { /* If not NULL, this is a name-hash cache pointing into map. */ uint32_t *hashes; + /* The checksum of the packfile or MIDX; points into map. */ + const unsigned char *checksum; + /* * Extended index. * @@ -136,6 +147,13 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) return b; } +static uint32_t bitmap_num_objects(struct bitmap_index *index) +{ + if (index->midx) + return index->midx->num_objects; + return index->pack->num_objects; +} + static int load_bitmap_header(struct bitmap_index *index) { struct bitmap_disk_header *header = (void *)index->map; @@ -154,7 +172,7 @@ static int load_bitmap_header(struct bitmap_index *index) /* Parse known bitmap format options */ { uint32_t flags = ntohs(header->options); - size_t cache_size = st_mult(index->pack->num_objects, sizeof(uint32_t)); + size_t cache_size = st_mult(bitmap_num_objects(index), sizeof(uint32_t)); unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz; if ((flags & BITMAP_OPT_FULL_DAG) == 0) @@ -170,6 +188,7 @@ static int load_bitmap_header(struct bitmap_index *index) } index->entry_count = ntohl(header->entry_count); + index->checksum = header->checksum; index->map_pos += header_size; return 0; } @@ -218,6 +237,15 @@ static inline uint8_t read_u8(const unsigned char *buffer, size_t *pos) #define MAX_XOR_OFFSET 160 +static int nth_bitmap_object_oid(struct bitmap_index *index, + struct object_id *oid, + uint32_t n) +{ + if (index->midx) + return nth_midxed_object_oid(oid, index->midx, n) ? 0 : -1; + return nth_packed_object_id(oid, index->pack, n); +} + static int load_bitmap_entries_v1(struct bitmap_index *index) { uint32_t i; @@ -237,7 +265,7 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) xor_offset = read_u8(index->map, &index->map_pos); flags = read_u8(index->map, &index->map_pos); - if (nth_packed_object_id(&oid, index->pack, commit_idx_pos) < 0) + if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0) return error("corrupt ewah bitmap: commit index %u out of range", (unsigned)commit_idx_pos); @@ -262,7 +290,14 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) return 0; } -static char *pack_bitmap_filename(struct packed_git *p) +char *midx_bitmap_filename(struct multi_pack_index *midx) +{ + return xstrfmt("%s-%s.bitmap", + get_midx_filename(midx->object_dir), + hash_to_hex(get_midx_checksum(midx))); +} + +char *pack_bitmap_filename(struct packed_git *p) { size_t len; @@ -271,6 +306,57 @@ static char *pack_bitmap_filename(struct packed_git *p) return xstrfmt("%.*s.bitmap", (int)len, p->pack_name); } +static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, + struct multi_pack_index *midx) +{ + struct stat st; + char *idx_name = midx_bitmap_filename(midx); + int fd = git_open(idx_name); + + free(idx_name); + + if (fd < 0) + return -1; + + if (fstat(fd, &st)) { + close(fd); + return -1; + } + + if (bitmap_git->pack || bitmap_git->midx) { + /* ignore extra bitmap file; we can only handle one */ + warning("ignoring extra bitmap file: %s", + get_midx_filename(midx->object_dir)); + close(fd); + return -1; + } + + bitmap_git->midx = midx; + bitmap_git->map_size = xsize_t(st.st_size); + bitmap_git->map_pos = 0; + bitmap_git->map = xmmap(NULL, bitmap_git->map_size, PROT_READ, + MAP_PRIVATE, fd, 0); + close(fd); + + if (load_bitmap_header(bitmap_git) < 0) + goto cleanup; + + if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) + goto cleanup; + + if (load_midx_revindex(bitmap_git->midx) < 0) { + warning(_("multi-pack bitmap is missing required reverse index")); + goto cleanup; + } + return 0; + +cleanup: + munmap(bitmap_git->map, bitmap_git->map_size); + bitmap_git->map_size = 0; + bitmap_git->map = NULL; + return -1; +} + static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git *packfile) { int fd; @@ -292,7 +378,8 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git return -1; } - if (bitmap_git->pack) { + if (bitmap_git->pack || bitmap_git->midx) { + /* ignore extra bitmap file; we can only handle one */ warning("ignoring extra bitmap file: %s", packfile->pack_name); close(fd); return -1; @@ -319,13 +406,39 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git return 0; } -static int load_pack_bitmap(struct bitmap_index *bitmap_git) +static int load_reverse_index(struct bitmap_index *bitmap_git) +{ + if (bitmap_is_midx(bitmap_git)) { + uint32_t i; + int ret; + + /* + * The multi-pack-index's .rev file is already loaded via + * open_pack_bitmap_1(). + * + * But we still need to open the individual pack .rev files, + * since we will need to make use of them in pack-objects. + */ + for (i = 0; i < bitmap_git->midx->num_packs; i++) { + if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) + die(_("load_reverse_index: could not open pack")); + ret = load_pack_revindex(bitmap_git->midx->packs[i]); + if (ret) + return ret; + } + return 0; + } + return load_pack_revindex(bitmap_git->pack); +} + +static int load_bitmap(struct bitmap_index *bitmap_git) { assert(bitmap_git->map); bitmap_git->bitmaps = kh_init_oid_map(); bitmap_git->ext_index.positions = kh_init_oid_pos(); - if (load_pack_revindex(bitmap_git->pack)) + + if (load_reverse_index(bitmap_git)) goto failed; if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) || @@ -369,11 +482,46 @@ static int open_pack_bitmap(struct repository *r, return ret; } +static int open_midx_bitmap(struct repository *r, + struct bitmap_index *bitmap_git) +{ + struct multi_pack_index *midx; + + assert(!bitmap_git->map); + + for (midx = get_multi_pack_index(r); midx; midx = midx->next) { + if (!open_midx_bitmap_1(bitmap_git, midx)) + return 0; + } + return -1; +} + +static int open_bitmap(struct repository *r, + struct bitmap_index *bitmap_git) +{ + assert(!bitmap_git->map); + + if (!open_midx_bitmap(r, bitmap_git)) + return 0; + return open_pack_bitmap(r, bitmap_git); +} + struct bitmap_index *prepare_bitmap_git(struct repository *r) { struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); - if (!open_pack_bitmap(r, bitmap_git) && !load_pack_bitmap(bitmap_git)) + if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git)) + return bitmap_git; + + free_bitmap_index(bitmap_git); + return NULL; +} + +struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx) +{ + struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); + + if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(bitmap_git)) return bitmap_git; free_bitmap_index(bitmap_git); @@ -404,7 +552,7 @@ static inline int bitmap_position_extended(struct bitmap_index *bitmap_git, if (pos < kh_end(positions)) { int bitmap_pos = kh_value(positions, pos); - return bitmap_pos + bitmap_git->pack->num_objects; + return bitmap_pos + bitmap_num_objects(bitmap_git); } return -1; @@ -423,10 +571,26 @@ static inline int bitmap_position_packfile(struct bitmap_index *bitmap_git, return pos; } +static int bitmap_position_midx(struct bitmap_index *bitmap_git, + const struct object_id *oid) +{ + uint32_t want, got; + if (!bsearch_midx(oid, bitmap_git->midx, &want)) + return -1; + + if (midx_to_pack_pos(bitmap_git->midx, want, &got) < 0) + return -1; + return got; +} + static int bitmap_position(struct bitmap_index *bitmap_git, const struct object_id *oid) { - int pos = bitmap_position_packfile(bitmap_git, oid); + int pos; + if (bitmap_is_midx(bitmap_git)) + pos = bitmap_position_midx(bitmap_git, oid); + else + pos = bitmap_position_packfile(bitmap_git, oid); return (pos >= 0) ? pos : bitmap_position_extended(bitmap_git, oid); } @@ -456,7 +620,7 @@ static int ext_index_add_object(struct bitmap_index *bitmap_git, bitmap_pos = kh_value(eindex->positions, hash_pos); } - return bitmap_pos + bitmap_git->pack->num_objects; + return bitmap_pos + bitmap_num_objects(bitmap_git); } struct bitmap_show_data { @@ -673,7 +837,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { struct object *obj; - if (!bitmap_get(objects, bitmap_git->pack->num_objects + i)) + if (!bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) continue; obj = eindex->objects[i]; @@ -737,6 +901,7 @@ static void show_objects_for_type( continue; for (offset = 0; offset < BITS_IN_EWORD; ++offset) { + struct packed_git *pack; struct object_id oid; uint32_t hash = 0, index_pos; off_t ofs; @@ -746,14 +911,28 @@ static void show_objects_for_type( offset += ewah_bit_ctz64(word >> offset); - index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset); - ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset); - nth_packed_object_id(&oid, bitmap_git->pack, index_pos); + if (bitmap_is_midx(bitmap_git)) { + struct multi_pack_index *m = bitmap_git->midx; + uint32_t pack_id; + + index_pos = pack_pos_to_midx(m, pos + offset); + ofs = nth_midxed_offset(m, index_pos); + nth_midxed_object_oid(&oid, m, index_pos); + + pack_id = nth_midxed_pack_int_id(m, index_pos); + pack = bitmap_git->midx->packs[pack_id]; + } else { + index_pos = pack_pos_to_index(bitmap_git->pack, pos + offset); + ofs = pack_pos_to_offset(bitmap_git->pack, pos + offset); + nth_bitmap_object_oid(bitmap_git, &oid, index_pos); + + pack = bitmap_git->pack; + } if (bitmap_git->hashes) hash = get_be32(bitmap_git->hashes + index_pos); - show_reach(&oid, object_type, 0, hash, bitmap_git->pack, ofs); + show_reach(&oid, object_type, 0, hash, pack, ofs); } } } @@ -765,8 +944,13 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git, struct object *object = roots->item; roots = roots->next; - if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0) - return 1; + if (bitmap_is_midx(bitmap_git)) { + if (bsearch_midx(&object->oid, bitmap_git->midx, NULL)) + return 1; + } else { + if (find_pack_entry_one(object->oid.hash, bitmap_git->pack) > 0) + return 1; + } } return 0; @@ -832,7 +1016,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, * them individually. */ for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_git->pack->num_objects; + uint32_t pos = i + bitmap_num_objects(bitmap_git); if (eindex->objects[i]->type == type && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos)) @@ -853,23 +1037,35 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, uint32_t pos) { - struct packed_git *pack = bitmap_git->pack; unsigned long size; struct object_info oi = OBJECT_INFO_INIT; oi.sizep = &size; - if (pos < pack->num_objects) { - off_t ofs = pack_pos_to_offset(pack, pos); + if (pos < bitmap_num_objects(bitmap_git)) { + struct packed_git *pack; + off_t ofs; + + if (bitmap_is_midx(bitmap_git)) { + uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, pos); + uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos); + + pack = bitmap_git->midx->packs[pack_id]; + ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); + } else { + pack = bitmap_git->pack; + ofs = pack_pos_to_offset(pack, pos); + } + if (packed_object_info(the_repository, pack, ofs, &oi) < 0) { struct object_id oid; - nth_packed_object_id(&oid, pack, - pack_pos_to_index(pack, pos)); + nth_bitmap_object_oid(bitmap_git, &oid, + pack_pos_to_index(pack, pos)); die(_("unable to get size of %s"), oid_to_hex(&oid)); } } else { struct eindex *eindex = &bitmap_git->ext_index; - struct object *obj = eindex->objects[pos - pack->num_objects]; + struct object *obj = eindex->objects[pos - bitmap_num_objects(bitmap_git)]; if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) die(_("unable to get size of %s"), oid_to_hex(&obj->oid)); } @@ -911,7 +1107,7 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, } for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_git->pack->num_objects; + uint32_t pos = i + bitmap_num_objects(bitmap_git); if (eindex->objects[i]->type == OBJ_BLOB && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos) && @@ -1041,7 +1237,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, /* try to open a bitmapped pack, but don't parse it yet * because we may not need to use it */ CALLOC_ARRAY(bitmap_git, 1); - if (open_pack_bitmap(revs->repo, bitmap_git) < 0) + if (open_bitmap(revs->repo, bitmap_git) < 0) goto cleanup; for (i = 0; i < revs->pending.nr; ++i) { @@ -1085,7 +1281,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, * from disk. this is the point of no return; after this the rev_list * becomes invalidated and we must perform the revwalk through bitmaps */ - if (load_pack_bitmap(bitmap_git) < 0) + if (load_bitmap(bitmap_git) < 0) goto cleanup; object_array_clear(&revs->pending); @@ -1128,22 +1324,49 @@ cleanup: return NULL; } -static void try_partial_reuse(struct bitmap_index *bitmap_git, - size_t pos, - struct bitmap *reuse, - struct pack_window **w_curs) +/* + * -1 means "stop trying further objects"; 0 means we may or may not have + * reused, but you can keep feeding bits. + */ +static int try_partial_reuse(struct packed_git *pack, + size_t pos, + struct bitmap *reuse, + struct pack_window **w_curs) { - off_t offset, header; + off_t offset, delta_obj_offset; enum object_type type; unsigned long size; - if (pos >= bitmap_git->pack->num_objects) - return; /* not actually in the pack */ + /* + * try_partial_reuse() is called either on (a) objects in the + * bitmapped pack (in the case of a single-pack bitmap) or (b) + * objects in the preferred pack of a multi-pack bitmap. + * Importantly, the latter can pretend as if only a single pack + * exists because: + * + * - The first pack->num_objects bits of a MIDX bitmap are + * reserved for the preferred pack, and + * + * - Ties due to duplicate objects are always resolved in + * favor of the preferred pack. + * + * Therefore we do not need to ever ask the MIDX for its copy of + * an object by OID, since it will always select it from the + * preferred pack. Likewise, the selected copy of the base + * object for any deltas will reside in the same pack. + * + * This means that we can reuse pos when looking up the bit in + * the reuse bitmap, too, since bits corresponding to the + * preferred pack precede all bits from other packs. + */ + + if (pos >= pack->num_objects) + return -1; /* not actually in the pack or MIDX preferred pack */ - offset = header = pack_pos_to_offset(bitmap_git->pack, pos); - type = unpack_object_header(bitmap_git->pack, w_curs, &offset, &size); + offset = delta_obj_offset = pack_pos_to_offset(pack, pos); + type = unpack_object_header(pack, w_curs, &offset, &size); if (type < 0) - return; /* broken packfile, punt */ + return -1; /* broken packfile, punt */ if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; @@ -1157,12 +1380,12 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, * and the normal slow path will complain about it in * more detail. */ - base_offset = get_delta_base(bitmap_git->pack, w_curs, - &offset, type, header); + base_offset = get_delta_base(pack, w_curs, &offset, type, + delta_obj_offset); if (!base_offset) - return; - if (offset_to_pack_pos(bitmap_git->pack, base_offset, &base_pos) < 0) - return; + return 0; + if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0) + return 0; /* * We assume delta dependencies always point backwards. This @@ -1174,7 +1397,7 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, * odd parameters. */ if (base_pos >= pos) - return; + return 0; /* * And finally, if we're not sending the base as part of our @@ -1185,13 +1408,22 @@ static void try_partial_reuse(struct bitmap_index *bitmap_git, * object_entry code path handle it. */ if (!bitmap_get(reuse, base_pos)) - return; + return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ bitmap_set(reuse, pos); + return 0; +} + +static uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git) +{ + struct multi_pack_index *m = bitmap_git->midx; + if (!m) + BUG("midx_preferred_pack: requires non-empty MIDX"); + return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0)); } int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, @@ -1199,20 +1431,37 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, uint32_t *entries, struct bitmap **reuse_out) { + struct packed_git *pack; struct bitmap *result = bitmap_git->result; struct bitmap *reuse; struct pack_window *w_curs = NULL; size_t i = 0; uint32_t offset; + uint32_t objects_nr; assert(result); + load_reverse_index(bitmap_git); + + if (bitmap_is_midx(bitmap_git)) + pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; + else + pack = bitmap_git->pack; + objects_nr = pack->num_objects; + while (i < result->word_alloc && result->words[i] == (eword_t)~0) i++; - /* Don't mark objects not in the packfile */ - if (i > bitmap_git->pack->num_objects / BITS_IN_EWORD) - i = bitmap_git->pack->num_objects / BITS_IN_EWORD; + /* + * Don't mark objects not in the packfile or preferred pack. This bitmap + * marks objects eligible for reuse, but the pack-reuse code only + * understands how to reuse a single pack. Since the preferred pack is + * guaranteed to have all bases for its deltas (in a multi-pack bitmap), + * we use it instead of another pack. In single-pack bitmaps, the choice + * is made for us. + */ + if (i > objects_nr / BITS_IN_EWORD) + i = objects_nr / BITS_IN_EWORD; reuse = bitmap_word_alloc(i); memset(reuse->words, 0xFF, i * sizeof(eword_t)); @@ -1226,10 +1475,23 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, break; offset += ewah_bit_ctz64(word >> offset); - try_partial_reuse(bitmap_git, pos + offset, reuse, &w_curs); + if (try_partial_reuse(pack, pos + offset, + reuse, &w_curs) < 0) { + /* + * try_partial_reuse indicated we couldn't reuse + * any bits, so there is no point in trying more + * bits in the current word, or any other words + * in result. + * + * Jump out of both loops to avoid future + * unnecessary calls to try_partial_reuse. + */ + goto done; + } } } +done: unuse_pack(&w_curs); *entries = bitmap_popcount(reuse); @@ -1243,7 +1505,7 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, * need to be handled separately. */ bitmap_and_not(result, reuse); - *packfile_out = bitmap_git->pack; + *packfile_out = pack; *reuse_out = reuse; return 0; } @@ -1296,7 +1558,7 @@ static uint32_t count_object_type(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { if (eindex->objects[i]->type == type && - bitmap_get(objects, bitmap_git->pack->num_objects + i)) + bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) count++; } @@ -1325,10 +1587,52 @@ void count_bitmap_commit_list(struct bitmap_index *bitmap_git, struct bitmap_test_data { struct bitmap_index *bitmap_git; struct bitmap *base; + struct bitmap *commits; + struct bitmap *trees; + struct bitmap *blobs; + struct bitmap *tags; struct progress *prg; size_t seen; }; +static void test_bitmap_type(struct bitmap_test_data *tdata, + struct object *obj, int pos) +{ + enum object_type bitmap_type = OBJ_NONE; + int bitmaps_nr = 0; + + if (bitmap_get(tdata->commits, pos)) { + bitmap_type = OBJ_COMMIT; + bitmaps_nr++; + } + if (bitmap_get(tdata->trees, pos)) { + bitmap_type = OBJ_TREE; + bitmaps_nr++; + } + if (bitmap_get(tdata->blobs, pos)) { + bitmap_type = OBJ_BLOB; + bitmaps_nr++; + } + if (bitmap_get(tdata->tags, pos)) { + bitmap_type = OBJ_TAG; + bitmaps_nr++; + } + + if (bitmap_type == OBJ_NONE) + die("object %s not found in type bitmaps", + oid_to_hex(&obj->oid)); + + if (bitmaps_nr > 1) + die("object %s does not have a unique type", + oid_to_hex(&obj->oid)); + + if (bitmap_type != obj->type) + die("object %s: real type %s, expected: %s", + oid_to_hex(&obj->oid), + type_name(obj->type), + type_name(bitmap_type)); +} + static void test_show_object(struct object *object, const char *name, void *data) { @@ -1338,6 +1642,7 @@ static void test_show_object(struct object *object, const char *name, bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid); if (bitmap_pos < 0) die("Object not in bitmap: %s\n", oid_to_hex(&object->oid)); + test_bitmap_type(tdata, object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); display_progress(tdata->prg, ++tdata->seen); @@ -1352,6 +1657,7 @@ static void test_show_commit(struct commit *commit, void *data) &commit->object.oid); if (bitmap_pos < 0) die("Object not in bitmap: %s\n", oid_to_hex(&commit->object.oid)); + test_bitmap_type(tdata, &commit->object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); display_progress(tdata->prg, ++tdata->seen); @@ -1399,6 +1705,10 @@ void test_bitmap_walk(struct rev_info *revs) tdata.bitmap_git = bitmap_git; tdata.base = bitmap_new(); + tdata.commits = ewah_to_bitmap(bitmap_git->commits); + tdata.trees = ewah_to_bitmap(bitmap_git->trees); + tdata.blobs = ewah_to_bitmap(bitmap_git->blobs); + tdata.tags = ewah_to_bitmap(bitmap_git->tags); tdata.prg = start_progress("Verifying bitmap entries", result_popcnt); tdata.seen = 0; @@ -1469,15 +1779,26 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, uint32_t i, num_objects; uint32_t *reposition; - num_objects = bitmap_git->pack->num_objects; + if (!bitmap_is_midx(bitmap_git)) + load_reverse_index(bitmap_git); + else if (load_midx_revindex(bitmap_git->midx) < 0) + BUG("rebuild_existing_bitmaps: missing required rev-cache " + "extension"); + + num_objects = bitmap_num_objects(bitmap_git); CALLOC_ARRAY(reposition, num_objects); for (i = 0; i < num_objects; ++i) { struct object_id oid; struct object_entry *oe; - nth_packed_object_id(&oid, bitmap_git->pack, - pack_pos_to_index(bitmap_git->pack, i)); + if (bitmap_is_midx(bitmap_git)) + nth_midxed_object_oid(&oid, + bitmap_git->midx, + pack_pos_to_midx(bitmap_git->midx, i)); + else + nth_packed_object_id(&oid, bitmap_git->pack, + pack_pos_to_index(bitmap_git->pack, i)); oe = packlist_find(mapping, &oid); if (oe) @@ -1503,6 +1824,19 @@ void free_bitmap_index(struct bitmap_index *b) free(b->ext_index.hashes); bitmap_free(b->result); bitmap_free(b->haves); + if (bitmap_is_midx(b)) { + /* + * Multi-pack bitmaps need to have resources associated with + * their on-disk reverse indexes unmapped so that stale .rev and + * .bitmap files can be removed. + * + * Unlike pack-based bitmaps, multi-pack bitmaps can be read and + * written in the same 'git multi-pack-index write --bitmap' + * process. Close resources so they can be removed safely on + * platforms like Windows. + */ + close_midx_revindex(b->midx); + } free(b); } @@ -1517,7 +1851,6 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, enum object_type object_type) { struct bitmap *result = bitmap_git->result; - struct packed_git *pack = bitmap_git->pack; off_t total = 0; struct ewah_iterator it; eword_t filter; @@ -1534,15 +1867,35 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, continue; for (offset = 0; offset < BITS_IN_EWORD; offset++) { - size_t pos; - if ((word >> offset) == 0) break; offset += ewah_bit_ctz64(word >> offset); - pos = base + offset; - total += pack_pos_to_offset(pack, pos + 1) - - pack_pos_to_offset(pack, pos); + + if (bitmap_is_midx(bitmap_git)) { + uint32_t pack_pos; + uint32_t midx_pos = pack_pos_to_midx(bitmap_git->midx, base + offset); + off_t offset = nth_midxed_offset(bitmap_git->midx, midx_pos); + + uint32_t pack_id = nth_midxed_pack_int_id(bitmap_git->midx, midx_pos); + struct packed_git *pack = bitmap_git->midx->packs[pack_id]; + + if (offset_to_pack_pos(pack, offset, &pack_pos) < 0) { + struct object_id oid; + nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos); + + die(_("could not find %s in pack %s at offset %"PRIuMAX), + oid_to_hex(&oid), + pack->pack_name, + (uintmax_t)offset); + } + + total += pack_pos_to_offset(pack, pack_pos + 1) - offset; + } else { + size_t pos = base + offset; + total += pack_pos_to_offset(bitmap_git->pack, pos + 1) - + pack_pos_to_offset(bitmap_git->pack, pos); + } } } @@ -1552,7 +1905,6 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) { struct bitmap *result = bitmap_git->result; - struct packed_git *pack = bitmap_git->pack; struct eindex *eindex = &bitmap_git->ext_index; off_t total = 0; struct object_info oi = OBJECT_INFO_INIT; @@ -1564,7 +1916,7 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) for (i = 0; i < eindex->count; i++) { struct object *obj = eindex->objects[i]; - if (!bitmap_get(result, pack->num_objects + i)) + if (!bitmap_get(result, bitmap_num_objects(bitmap_git) + i)) continue; if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) @@ -1594,7 +1946,28 @@ off_t get_disk_usage_from_bitmap(struct bitmap_index *bitmap_git, return total; } +int bitmap_is_midx(struct bitmap_index *bitmap_git) +{ + return !!bitmap_git->midx; +} + const struct string_list *bitmap_preferred_tips(struct repository *r) { return repo_config_get_value_multi(r, "pack.preferbitmaptips"); } + +int bitmap_is_preferred_refname(struct repository *r, const char *refname) +{ + const struct string_list *preferred_tips = bitmap_preferred_tips(r); + struct string_list_item *item; + + if (!preferred_tips) + return 0; + + for_each_string_list_item(item, preferred_tips) { + if (starts_with(refname, item->string)) + return 1; + } + + return 0; +} diff --git a/pack-bitmap.h b/pack-bitmap.h index 99d733eb26..469090bad2 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -44,6 +44,7 @@ typedef int (*show_reachable_fn)( struct bitmap_index; struct bitmap_index *prepare_bitmap_git(struct repository *r); +struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx); void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits, uint32_t *trees, uint32_t *blobs, uint32_t *tags); void traverse_bitmap_commit_list(struct bitmap_index *, @@ -87,12 +88,17 @@ struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit); void bitmap_writer_select_commits(struct commit **indexed_commits, unsigned int indexed_commits_nr, int max_bitmaps); -void bitmap_writer_build(struct packing_data *to_pack); +int bitmap_writer_build(struct packing_data *to_pack); void bitmap_writer_finish(struct pack_idx_entry **index, uint32_t index_nr, const char *filename, uint16_t options); +char *midx_bitmap_filename(struct multi_pack_index *midx); +char *pack_bitmap_filename(struct packed_git *p); + +int bitmap_is_midx(struct bitmap_index *bitmap_git); const struct string_list *bitmap_preferred_tips(struct repository *r); +int bitmap_is_preferred_refname(struct repository *r, const char *refname); #endif diff --git a/packfile.c b/packfile.c index 4d0d625238..2aea4737b4 100644 --- a/packfile.c +++ b/packfile.c @@ -860,7 +860,7 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!strcmp(file_name, "multi-pack-index")) return; if (starts_with(file_name, "multi-pack-index") && - ends_with(file_name, ".rev")) + (ends_with(file_name, ".bitmap") || ends_with(file_name, ".rev"))) return; if (ends_with(file_name, ".idx") || ends_with(file_name, ".rev") || @@ -425,6 +425,10 @@ GIT_TEST_MULTI_PACK_INDEX=<boolean>, when true, forces the multi-pack- index to be written after every 'git repack' command, and overrides the 'core.multiPackIndex' setting to true. +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=<boolean>, when true, sets the +'--bitmap' option on all invocations of 'git multi-pack-index write', +and ignores pack-objects' '--write-bitmap-index'. + GIT_TEST_SIDEBAND_ALL=<boolean>, when true, overrides the 'uploadpack.allowSidebandAll' setting to true, and when false, forces fetch-pack to not request sideband-all (even if the server advertises diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c index 7c2eb11a8e..cb0d27049a 100644 --- a/t/helper/test-read-midx.c +++ b/t/helper/test-read-midx.c @@ -60,12 +60,26 @@ static int read_midx_file(const char *object_dir, int show_objects) return 0; } +static int read_midx_checksum(const char *object_dir) +{ + struct multi_pack_index *m; + + setup_git_directory(); + m = load_multi_pack_index(object_dir, 1); + if (!m) + return 1; + printf("%s\n", hash_to_hex(get_midx_checksum(m))); + return 0; +} + int cmd__read_midx(int argc, const char **argv) { if (!(argc == 2 || argc == 3)) - usage("read-midx [--show-objects] <object-dir>"); + usage("read-midx [--show-objects|--checksum] <object-dir>"); if (!strcmp(argv[1], "--show-objects")) return read_midx_file(argv[2], 1); + else if (!strcmp(argv[1], "--checksum")) + return read_midx_checksum(argv[2]); return read_midx_file(argv[1], 0); } diff --git a/t/lib-bitmap.sh b/t/lib-bitmap.sh index fe3f98be24..21d0392dda 100644 --- a/t/lib-bitmap.sh +++ b/t/lib-bitmap.sh @@ -1,3 +1,6 @@ +# Helpers for scripts testing bitmap functionality; see t5310 for +# example usage. + # Compare a file containing rev-list bitmap traversal output to its non-bitmap # counterpart. You can't just use test_cmp for this, because the two produce # subtly different output: @@ -24,3 +27,240 @@ test_bitmap_traversal () { test_cmp "$1.normalized" "$2.normalized" && rm -f "$1.normalized" "$2.normalized" } + +# To ensure the logic for "maximal commits" is exercised, make +# the repository a bit more complicated. +# +# other second +# * * +# (99 commits) (99 commits) +# * * +# |\ /| +# | * octo-other octo-second * | +# |/|\_________ ____________/|\| +# | \ \/ __________/ | +# | | ________/\ / | +# * |/ * merge-right * +# | _|__________/ \____________ | +# |/ | \| +# (l1) * * merge-left * (r1) +# | / \________________________ | +# |/ \| +# (l2) * * (r2) +# \___________________________ | +# \| +# * (base) +# +# We only push bits down the first-parent history, which +# makes some of these commits unimportant! +# +# The important part for the maximal commit algorithm is how +# the bitmasks are extended. Assuming starting bit positions +# for second (bit 0) and other (bit 1), the bitmasks at the +# end should be: +# +# second: 1 (maximal, selected) +# other: 01 (maximal, selected) +# (base): 11 (maximal) +# +# This complicated history was important for a previous +# version of the walk that guarantees never walking a +# commit multiple times. That goal might be important +# again, so preserve this complicated case. For now, this +# test will guarantee that the bitmaps are computed +# correctly, even with the repeat calculations. +setup_bitmap_history() { + test_expect_success 'setup repo with moderate-sized history' ' + test_commit_bulk --id=file 10 && + git branch -M second && + git checkout -b other HEAD~5 && + test_commit_bulk --id=side 10 && + + # add complicated history setup, including merges and + # ambiguous merge-bases + + git checkout -b merge-left other~2 && + git merge second~2 -m "merge-left" && + + git checkout -b merge-right second~1 && + git merge other~1 -m "merge-right" && + + git checkout -b octo-second second && + git merge merge-left merge-right -m "octopus-second" && + + git checkout -b octo-other other && + git merge merge-left merge-right -m "octopus-other" && + + git checkout other && + git merge octo-other -m "pull octopus" && + + git checkout second && + git merge octo-second -m "pull octopus" && + + # Remove these branches so they are not selected + # as bitmap tips + git branch -D merge-left && + git branch -D merge-right && + git branch -D octo-other && + git branch -D octo-second && + + # add padding to make these merges less interesting + # and avoid having them selected for bitmaps + test_commit_bulk --id=file 100 && + git checkout other && + test_commit_bulk --id=side 100 && + git checkout second && + + bitmaptip=$(git rev-parse second) && + blob=$(echo tagged-blob | git hash-object -w --stdin) && + git tag tagged-blob $blob + ' +} + +rev_list_tests_head () { + test_expect_success "counting commits via bitmap ($state, $branch)" ' + git rev-list --count $branch >expect && + git rev-list --use-bitmap-index --count $branch >actual && + test_cmp expect actual + ' + + test_expect_success "counting partial commits via bitmap ($state, $branch)" ' + git rev-list --count $branch~5..$branch >expect && + git rev-list --use-bitmap-index --count $branch~5..$branch >actual && + test_cmp expect actual + ' + + test_expect_success "counting commits with limit ($state, $branch)" ' + git rev-list --count -n 1 $branch >expect && + git rev-list --use-bitmap-index --count -n 1 $branch >actual && + test_cmp expect actual + ' + + test_expect_success "counting non-linear history ($state, $branch)" ' + git rev-list --count other...second >expect && + git rev-list --use-bitmap-index --count other...second >actual && + test_cmp expect actual + ' + + test_expect_success "counting commits with limiting ($state, $branch)" ' + git rev-list --count $branch -- 1.t >expect && + git rev-list --use-bitmap-index --count $branch -- 1.t >actual && + test_cmp expect actual + ' + + test_expect_success "counting objects via bitmap ($state, $branch)" ' + git rev-list --count --objects $branch >expect && + git rev-list --use-bitmap-index --count --objects $branch >actual && + test_cmp expect actual + ' + + test_expect_success "enumerate commits ($state, $branch)" ' + git rev-list --use-bitmap-index $branch >actual && + git rev-list $branch >expect && + test_bitmap_traversal --no-confirm-bitmaps expect actual + ' + + test_expect_success "enumerate --objects ($state, $branch)" ' + git rev-list --objects --use-bitmap-index $branch >actual && + git rev-list --objects $branch >expect && + test_bitmap_traversal expect actual + ' + + test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" ' + git rev-list --objects --use-bitmap-index $branch tagged-blob >actual && + grep $blob actual + ' +} + +rev_list_tests () { + state=$1 + + for branch in "second" "other" + do + rev_list_tests_head + done +} + +basic_bitmap_tests () { + tip="$1" + test_expect_success 'rev-list --test-bitmap verifies bitmaps' " + git rev-list --test-bitmap "${tip:-HEAD}" + " + + rev_list_tests 'full bitmap' + + test_expect_success 'clone from bitmapped repository' ' + rm -fr clone.git && + git clone --no-local --bare . clone.git && + git rev-parse HEAD >expect && + git --git-dir=clone.git rev-parse HEAD >actual && + test_cmp expect actual + ' + + test_expect_success 'partial clone from bitmapped repository' ' + test_config uploadpack.allowfilter true && + rm -fr partial-clone.git && + git clone --no-local --bare --filter=blob:none . partial-clone.git && + ( + cd partial-clone.git && + pack=$(echo objects/pack/*.pack) && + git verify-pack -v "$pack" >have && + awk "/blob/ { print \$1 }" <have >blobs && + # we expect this single blob because of the direct ref + git rev-parse refs/tags/tagged-blob >expect && + test_cmp expect blobs + ) + ' + + test_expect_success 'setup further non-bitmapped commits' ' + test_commit_bulk --id=further 10 + ' + + rev_list_tests 'partial bitmap' + + test_expect_success 'fetch (partial bitmap)' ' + git --git-dir=clone.git fetch origin second:second && + git rev-parse HEAD >expect && + git --git-dir=clone.git rev-parse HEAD >actual && + test_cmp expect actual + ' + + test_expect_success 'enumerating progress counts pack-reused objects' ' + count=$(git rev-list --objects --all --count) && + git repack -adb && + + # check first with only reused objects; confirm that our + # progress showed the right number, and also that we did + # pack-reuse as expected. Check only the final "done" + # line of the meter (there may be an arbitrary number of + # intermediate lines ending with CR). + GIT_PROGRESS_DELAY=0 \ + git pack-objects --all --stdout --progress \ + </dev/null >/dev/null 2>stderr && + grep "Enumerating objects: $count, done" stderr && + grep "pack-reused $count" stderr && + + # now the same but with one non-reused object + git commit --allow-empty -m "an extra commit object" && + GIT_PROGRESS_DELAY=0 \ + git pack-objects --all --stdout --progress \ + </dev/null >/dev/null 2>stderr && + grep "Enumerating objects: $((count+1)), done" stderr && + grep "pack-reused $count" stderr + ' +} + +# have_delta <obj> <expected_base> +# +# Note that because this relies on cat-file, it might find _any_ copy of an +# object in the repository. The caller is responsible for making sure +# there's only one (e.g., via "repack -ad", or having just fetched a copy). +have_delta () { + echo $2 >expect && + echo $1 | git cat-file --batch-check="%(deltabase)" >actual && + test_cmp expect actual +} + +midx_checksum () { + test-tool read-midx --checksum "$1" +} diff --git a/t/perf/lib-bitmap.sh b/t/perf/lib-bitmap.sh new file mode 100644 index 0000000000..63d3bc7cec --- /dev/null +++ b/t/perf/lib-bitmap.sh @@ -0,0 +1,69 @@ +# Helper functions for testing bitmap performance; see p5310. + +test_full_bitmap () { + test_perf 'simulated clone' ' + git pack-objects --stdout --all </dev/null >/dev/null + ' + + test_perf 'simulated fetch' ' + have=$(git rev-list HEAD~100 -1) && + { + echo HEAD && + echo ^$have + } | git pack-objects --revs --stdout >/dev/null + ' + + test_perf 'pack to file (bitmap)' ' + git pack-objects --use-bitmap-index --all pack1b </dev/null >/dev/null + ' + + test_perf 'rev-list (commits)' ' + git rev-list --all --use-bitmap-index >/dev/null + ' + + test_perf 'rev-list (objects)' ' + git rev-list --all --use-bitmap-index --objects >/dev/null + ' + + test_perf 'rev-list with tag negated via --not --all (objects)' ' + git rev-list perf-tag --not --all --use-bitmap-index --objects >/dev/null + ' + + test_perf 'rev-list with negative tag (objects)' ' + git rev-list HEAD --not perf-tag --use-bitmap-index --objects >/dev/null + ' + + test_perf 'rev-list count with blob:none' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=blob:none >/dev/null + ' + + test_perf 'rev-list count with blob:limit=1k' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=blob:limit=1k >/dev/null + ' + + test_perf 'rev-list count with tree:0' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=tree:0 >/dev/null + ' + + test_perf 'simulated partial clone' ' + git pack-objects --stdout --all --filter=blob:none </dev/null >/dev/null + ' +} + +test_partial_bitmap () { + test_perf 'clone (partial bitmap)' ' + git pack-objects --stdout --all </dev/null >/dev/null + ' + + test_perf 'pack to file (partial bitmap)' ' + git pack-objects --use-bitmap-index --all pack2b </dev/null >/dev/null + ' + + test_perf 'rev-list with tree filter (partial bitmap)' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=tree:0 >/dev/null + ' +} diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 452be01056..7ad4f237bc 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -2,6 +2,7 @@ test_description='Tests pack performance using bitmaps' . ./perf-lib.sh +. "${TEST_DIRECTORY}/perf/lib-bitmap.sh" test_perf_large_repo @@ -25,56 +26,7 @@ test_perf 'repack to disk' ' git repack -ad ' -test_perf 'simulated clone' ' - git pack-objects --stdout --all </dev/null >/dev/null -' - -test_perf 'simulated fetch' ' - have=$(git rev-list HEAD~100 -1) && - { - echo HEAD && - echo ^$have - } | git pack-objects --revs --stdout >/dev/null -' - -test_perf 'pack to file (bitmap)' ' - git pack-objects --use-bitmap-index --all pack1b </dev/null >/dev/null -' - -test_perf 'rev-list (commits)' ' - git rev-list --all --use-bitmap-index >/dev/null -' - -test_perf 'rev-list (objects)' ' - git rev-list --all --use-bitmap-index --objects >/dev/null -' - -test_perf 'rev-list with tag negated via --not --all (objects)' ' - git rev-list perf-tag --not --all --use-bitmap-index --objects >/dev/null -' - -test_perf 'rev-list with negative tag (objects)' ' - git rev-list HEAD --not perf-tag --use-bitmap-index --objects >/dev/null -' - -test_perf 'rev-list count with blob:none' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=blob:none >/dev/null -' - -test_perf 'rev-list count with blob:limit=1k' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=blob:limit=1k >/dev/null -' - -test_perf 'rev-list count with tree:0' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=tree:0 >/dev/null -' - -test_perf 'simulated partial clone' ' - git pack-objects --stdout --all --filter=blob:none </dev/null >/dev/null -' +test_full_bitmap test_expect_success 'create partial bitmap state' ' # pick a commit to represent the repo tip in the past @@ -97,17 +49,6 @@ test_expect_success 'create partial bitmap state' ' git update-ref HEAD $orig_tip ' -test_perf 'clone (partial bitmap)' ' - git pack-objects --stdout --all </dev/null >/dev/null -' - -test_perf 'pack to file (partial bitmap)' ' - git pack-objects --use-bitmap-index --all pack2b </dev/null >/dev/null -' - -test_perf 'rev-list with tree filter (partial bitmap)' ' - git rev-list --use-bitmap-index --count --objects --all \ - --filter=tree:0 >/dev/null -' +test_partial_bitmap test_done diff --git a/t/perf/p5326-multi-pack-bitmaps.sh b/t/perf/p5326-multi-pack-bitmaps.sh new file mode 100755 index 0000000000..5845109ac7 --- /dev/null +++ b/t/perf/p5326-multi-pack-bitmaps.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +test_description='Tests performance using midx bitmaps' +. ./perf-lib.sh +. "${TEST_DIRECTORY}/perf/lib-bitmap.sh" + +test_perf_large_repo + +test_expect_success 'enable multi-pack index' ' + git config core.multiPackIndex true +' + +test_perf 'setup multi-pack index' ' + git repack -ad && + git multi-pack-index write --bitmap +' + +test_full_bitmap + +test_expect_success 'create partial bitmap state' ' + # pick a commit to represent the repo tip in the past + cutoff=$(git rev-list HEAD~100 -1) && + orig_tip=$(git rev-parse HEAD) && + + # now pretend we have just one tip + rm -rf .git/logs .git/refs/* .git/packed-refs && + git update-ref HEAD $cutoff && + + # and then repack, which will leave us with a nice + # big bitmap pack of the "old" history, and all of + # the new history will be loose, as if it had been pushed + # up incrementally and exploded via unpack-objects + git repack -Ad && + git multi-pack-index write --bitmap && + + # and now restore our original tip, as if the pushes + # had happened + git update-ref HEAD $orig_tip +' + +test_partial_bitmap + +test_done diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index a211a66c67..bba679685f 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -4,6 +4,9 @@ test_description='partial clone' . ./test-lib.sh +# missing promisor objects cause repacks which write bitmaps to fail +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + delete_object () { rm $1/.git/objects/$(echo $2 | sed -e 's|^..|&/|') } @@ -536,7 +539,13 @@ test_expect_success 'gc does not repack promisor objects if there are none' ' repack_and_check () { rm -rf repo2 && cp -r repo repo2 && - git -C repo2 repack $1 -d && + if test x"$1" = "x--must-fail" + then + shift + test_must_fail git -C repo2 repack $1 -d + else + git -C repo2 repack $1 -d + fi && git -C repo2 fsck && git -C repo2 cat-file -e $2 && @@ -561,6 +570,7 @@ test_expect_success 'repack -d does not irreversibly delete promisor objects' ' printf "$THREE\n" | pack_as_from_promisor && delete_object repo "$ONE" && + repack_and_check --must-fail -ab "$TWO" "$THREE" && repack_and_check -a "$TWO" "$THREE" && repack_and_check -A "$TWO" "$THREE" && repack_and_check -l "$TWO" "$THREE" diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index b02838750e..673baa5c3c 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -8,6 +8,10 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME . "$TEST_DIRECTORY"/lib-bundle.sh . "$TEST_DIRECTORY"/lib-bitmap.sh +# t5310 deals only with single-pack bitmaps, so don't write MIDX bitmaps in +# their place. +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + objpath () { echo ".git/objects/$(echo "$1" | sed -e 's|\(..\)|\1/|')" } @@ -25,93 +29,10 @@ has_any () { grep -Ff "$1" "$2" } -# To ensure the logic for "maximal commits" is exercised, make -# the repository a bit more complicated. -# -# other second -# * * -# (99 commits) (99 commits) -# * * -# |\ /| -# | * octo-other octo-second * | -# |/|\_________ ____________/|\| -# | \ \/ __________/ | -# | | ________/\ / | -# * |/ * merge-right * -# | _|__________/ \____________ | -# |/ | \| -# (l1) * * merge-left * (r1) -# | / \________________________ | -# |/ \| -# (l2) * * (r2) -# \___________________________ | -# \| -# * (base) -# -# We only push bits down the first-parent history, which -# makes some of these commits unimportant! -# -# The important part for the maximal commit algorithm is how -# the bitmasks are extended. Assuming starting bit positions -# for second (bit 0) and other (bit 1), the bitmasks at the -# end should be: -# -# second: 1 (maximal, selected) -# other: 01 (maximal, selected) -# (base): 11 (maximal) -# -# This complicated history was important for a previous -# version of the walk that guarantees never walking a -# commit multiple times. That goal might be important -# again, so preserve this complicated case. For now, this -# test will guarantee that the bitmaps are computed -# correctly, even with the repeat calculations. - -test_expect_success 'setup repo with moderate-sized history' ' - test_commit_bulk --id=file 10 && - git branch -M second && - git checkout -b other HEAD~5 && - test_commit_bulk --id=side 10 && - - # add complicated history setup, including merges and - # ambiguous merge-bases - - git checkout -b merge-left other~2 && - git merge second~2 -m "merge-left" && - - git checkout -b merge-right second~1 && - git merge other~1 -m "merge-right" && - - git checkout -b octo-second second && - git merge merge-left merge-right -m "octopus-second" && - - git checkout -b octo-other other && - git merge merge-left merge-right -m "octopus-other" && - - git checkout other && - git merge octo-other -m "pull octopus" && - - git checkout second && - git merge octo-second -m "pull octopus" && - - # Remove these branches so they are not selected - # as bitmap tips - git branch -D merge-left && - git branch -D merge-right && - git branch -D octo-other && - git branch -D octo-second && - - # add padding to make these merges less interesting - # and avoid having them selected for bitmaps - test_commit_bulk --id=file 100 && - git checkout other && - test_commit_bulk --id=side 100 && - git checkout second && - - bitmaptip=$(git rev-parse second) && - blob=$(echo tagged-blob | git hash-object -w --stdin) && - git tag tagged-blob $blob && - git config repack.writebitmaps true +setup_bitmap_history + +test_expect_success 'setup writing bitmaps during repack' ' + git config repack.writeBitmaps true ' test_expect_success 'full repack creates bitmaps' ' @@ -123,109 +44,7 @@ test_expect_success 'full repack creates bitmaps' ' grep "\"key\":\"num_maximal_commits\",\"value\":\"107\"" trace ' -test_expect_success 'rev-list --test-bitmap verifies bitmaps' ' - git rev-list --test-bitmap HEAD -' - -rev_list_tests_head () { - test_expect_success "counting commits via bitmap ($state, $branch)" ' - git rev-list --count $branch >expect && - git rev-list --use-bitmap-index --count $branch >actual && - test_cmp expect actual - ' - - test_expect_success "counting partial commits via bitmap ($state, $branch)" ' - git rev-list --count $branch~5..$branch >expect && - git rev-list --use-bitmap-index --count $branch~5..$branch >actual && - test_cmp expect actual - ' - - test_expect_success "counting commits with limit ($state, $branch)" ' - git rev-list --count -n 1 $branch >expect && - git rev-list --use-bitmap-index --count -n 1 $branch >actual && - test_cmp expect actual - ' - - test_expect_success "counting non-linear history ($state, $branch)" ' - git rev-list --count other...second >expect && - git rev-list --use-bitmap-index --count other...second >actual && - test_cmp expect actual - ' - - test_expect_success "counting commits with limiting ($state, $branch)" ' - git rev-list --count $branch -- 1.t >expect && - git rev-list --use-bitmap-index --count $branch -- 1.t >actual && - test_cmp expect actual - ' - - test_expect_success "counting objects via bitmap ($state, $branch)" ' - git rev-list --count --objects $branch >expect && - git rev-list --use-bitmap-index --count --objects $branch >actual && - test_cmp expect actual - ' - - test_expect_success "enumerate commits ($state, $branch)" ' - git rev-list --use-bitmap-index $branch >actual && - git rev-list $branch >expect && - test_bitmap_traversal --no-confirm-bitmaps expect actual - ' - - test_expect_success "enumerate --objects ($state, $branch)" ' - git rev-list --objects --use-bitmap-index $branch >actual && - git rev-list --objects $branch >expect && - test_bitmap_traversal expect actual - ' - - test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" ' - git rev-list --objects --use-bitmap-index $branch tagged-blob >actual && - grep $blob actual - ' -} - -rev_list_tests () { - state=$1 - - for branch in "second" "other" - do - rev_list_tests_head - done -} - -rev_list_tests 'full bitmap' - -test_expect_success 'clone from bitmapped repository' ' - git clone --no-local --bare . clone.git && - git rev-parse HEAD >expect && - git --git-dir=clone.git rev-parse HEAD >actual && - test_cmp expect actual -' - -test_expect_success 'partial clone from bitmapped repository' ' - test_config uploadpack.allowfilter true && - git clone --no-local --bare --filter=blob:none . partial-clone.git && - ( - cd partial-clone.git && - pack=$(echo objects/pack/*.pack) && - git verify-pack -v "$pack" >have && - awk "/blob/ { print \$1 }" <have >blobs && - # we expect this single blob because of the direct ref - git rev-parse refs/tags/tagged-blob >expect && - test_cmp expect blobs - ) -' - -test_expect_success 'setup further non-bitmapped commits' ' - test_commit_bulk --id=further 10 -' - -rev_list_tests 'partial bitmap' - -test_expect_success 'fetch (partial bitmap)' ' - git --git-dir=clone.git fetch origin second:second && - git rev-parse HEAD >expect && - git --git-dir=clone.git rev-parse HEAD >actual && - test_cmp expect actual -' +basic_bitmap_tests test_expect_success 'incremental repack fails when bitmaps are requested' ' test_commit more-1 && @@ -461,40 +280,6 @@ test_expect_success 'truncated bitmap fails gracefully (cache)' ' test_i18ngrep corrupted.bitmap.index stderr ' -test_expect_success 'enumerating progress counts pack-reused objects' ' - count=$(git rev-list --objects --all --count) && - git repack -adb && - - # check first with only reused objects; confirm that our progress - # showed the right number, and also that we did pack-reuse as expected. - # Check only the final "done" line of the meter (there may be an - # arbitrary number of intermediate lines ending with CR). - GIT_PROGRESS_DELAY=0 \ - git pack-objects --all --stdout --progress \ - </dev/null >/dev/null 2>stderr && - grep "Enumerating objects: $count, done" stderr && - grep "pack-reused $count" stderr && - - # now the same but with one non-reused object - git commit --allow-empty -m "an extra commit object" && - GIT_PROGRESS_DELAY=0 \ - git pack-objects --all --stdout --progress \ - </dev/null >/dev/null 2>stderr && - grep "Enumerating objects: $((count+1)), done" stderr && - grep "pack-reused $count" stderr -' - -# have_delta <obj> <expected_base> -# -# Note that because this relies on cat-file, it might find _any_ copy of an -# object in the repository. The caller is responsible for making sure -# there's only one (e.g., via "repack -ad", or having just fetched a copy). -have_delta () { - echo $2 >expect && - echo $1 | git cat-file --batch-check="%(deltabase)" >actual && - test_cmp expect actual -} - # Create a state of history with these properties: # # - refs that allow a client to fetch some new history, while sharing some old diff --git a/t/t5319-multi-pack-index.sh b/t/t5319-multi-pack-index.sh index 3d4d9f10c3..bb04f0f23b 100755 --- a/t/t5319-multi-pack-index.sh +++ b/t/t5319-multi-pack-index.sh @@ -201,6 +201,34 @@ test_expect_success 'write midx with twelve packs' ' compare_results_with_midx "twelve packs" +test_expect_success 'multi-pack-index *.rev cleanup with --object-dir' ' + git init repo && + git clone -s repo alternate && + + test_when_finished "rm -rf repo alternate" && + + ( + cd repo && + test_commit base && + git repack -d + ) && + + ours="alternate/.git/objects/pack/multi-pack-index-123.rev" && + theirs="repo/.git/objects/pack/multi-pack-index-abc.rev" && + touch "$ours" "$theirs" && + + ( + cd alternate && + git multi-pack-index --object-dir ../repo/.git/objects write + ) && + + # writing a midx in "repo" should not remove the .rev file in the + # alternate + test_path_is_file repo/.git/objects/pack/multi-pack-index && + test_path_is_file $ours && + test_path_is_missing $theirs +' + test_expect_success 'warn on improper hash version' ' git init --object-format=sha1 sha1 && ( @@ -277,6 +305,23 @@ test_expect_success 'midx picks objects from preferred pack' ' ) ' +test_expect_success 'preferred packs must be non-empty' ' + test_when_finished rm -rf preferred.git && + git init preferred.git && + ( + cd preferred.git && + + test_commit base && + git repack -ad && + + empty="$(git pack-objects $objdir/pack/pack </dev/null)" && + + test_must_fail git multi-pack-index write \ + --preferred-pack=pack-$empty.pack 2>err && + grep "with no objects" err + ) +' + test_expect_success 'verify multi-pack-index success' ' git multi-pack-index verify --object-dir=$objdir ' @@ -487,7 +532,8 @@ test_expect_success 'repack preserves multi-pack-index when creating packs' ' compare_results_with_midx "after repack" test_expect_success 'multi-pack-index and pack-bitmap' ' - git -c repack.writeBitmaps=true repack -ad && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -c repack.writeBitmaps=true repack -ad && git multi-pack-index write && git rev-list --test-bitmap HEAD ' @@ -537,7 +583,15 @@ test_expect_success 'force some 64-bit offsets with pack-objects' ' idx64=objects64/pack/test-64-$pack64.idx && chmod u+w $idx64 && corrupt_data $idx64 $(test_oid idxoff) "\02" && - midx64=$(git multi-pack-index --object-dir=objects64 write) && + # objects64 is not a real repository, but can serve as an alternate + # anyway so we can write a MIDX into it + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + ( cd ../objects64 && pwd ) >.git/objects/info/alternates && + midx64=$(git multi-pack-index --object-dir=../objects64 write) + ) && midx_read_expect 1 63 5 objects64 " large-offsets" ' @@ -842,4 +896,9 @@ test_expect_success 'usage shown without sub-command' ' ! test_i18ngrep "unrecognized subcommand" err ' +test_expect_success 'complains when run outside of a repository' ' + nongit test_must_fail git multi-pack-index write 2>err && + grep "not a git repository" err +' + test_done diff --git a/t/t5326-multi-pack-bitmaps.sh b/t/t5326-multi-pack-bitmaps.sh new file mode 100755 index 0000000000..4ad7c2c969 --- /dev/null +++ b/t/t5326-multi-pack-bitmaps.sh @@ -0,0 +1,286 @@ +#!/bin/sh + +test_description='exercise basic multi-pack bitmap functionality' +. ./test-lib.sh +. "${TEST_DIRECTORY}/lib-bitmap.sh" + +# We'll be writing our own midx and bitmaps, so avoid getting confused by the +# automatic ones. +GIT_TEST_MULTI_PACK_INDEX=0 +GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 + +objdir=.git/objects +midx=$objdir/pack/multi-pack-index + +# midx_pack_source <obj> +midx_pack_source () { + test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2 +} + +setup_bitmap_history + +test_expect_success 'enable core.multiPackIndex' ' + git config core.multiPackIndex true +' + +test_expect_success 'create single-pack midx with bitmaps' ' + git repack -ad && + git multi-pack-index write --bitmap && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + test_path_is_file $midx-$(midx_checksum $objdir).rev +' + +basic_bitmap_tests + +test_expect_success 'create new additional packs' ' + for i in $(test_seq 1 16) + do + test_commit "$i" && + git repack -d || return 1 + done && + + git checkout -b other2 HEAD~8 && + for i in $(test_seq 1 8) + do + test_commit "side-$i" && + git repack -d || return 1 + done && + git checkout second +' + +test_expect_success 'create multi-pack midx with bitmaps' ' + git multi-pack-index write --bitmap && + + ls $objdir/pack/pack-*.pack >packs && + test_line_count = 25 packs && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + test_path_is_file $midx-$(midx_checksum $objdir).rev +' + +basic_bitmap_tests + +test_expect_success '--no-bitmap is respected when bitmaps exist' ' + git multi-pack-index write --bitmap && + + test_commit respect--no-bitmap && + git repack -d && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + test_path_is_file $midx-$(midx_checksum $objdir).rev && + + git multi-pack-index write --no-bitmap && + + test_path_is_file $midx && + test_path_is_missing $midx-$(midx_checksum $objdir).bitmap && + test_path_is_missing $midx-$(midx_checksum $objdir).rev +' + +test_expect_success 'setup midx with base from later pack' ' + # Write a and b so that "a" is a delta on top of base "b", since Git + # prefers to delete contents out of a base rather than add to a shorter + # object. + test_seq 1 128 >a && + test_seq 1 130 >b && + + git add a b && + git commit -m "initial commit" && + + a=$(git rev-parse HEAD:a) && + b=$(git rev-parse HEAD:b) && + + # In the first pack, "a" is stored as a delta to "b". + p1=$(git pack-objects .git/objects/pack/pack <<-EOF + $a + $b + EOF + ) && + + # In the second pack, "a" is missing, and "b" is not a delta nor base to + # any other object. + p2=$(git pack-objects .git/objects/pack/pack <<-EOF + $b + $(git rev-parse HEAD) + $(git rev-parse HEAD^{tree}) + EOF + ) && + + git prune-packed && + # Use the second pack as the preferred source, so that "b" occurs + # earlier in the MIDX object order, rendering "a" unusable for pack + # reuse. + git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx && + + have_delta $a $b && + test $(midx_pack_source $a) != $(midx_pack_source $b) +' + +rev_list_tests 'full bitmap with backwards delta' + +test_expect_success 'clone with bitmaps enabled' ' + git clone --no-local --bare . clone-reverse-delta.git && + test_when_finished "rm -fr clone-reverse-delta.git" && + + git rev-parse HEAD >expect && + git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual && + test_cmp expect actual +' + +bitmap_reuse_tests() { + from=$1 + to=$2 + + test_expect_success "setup pack reuse tests ($from -> $to)" ' + rm -fr repo && + git init repo && + ( + cd repo && + test_commit_bulk 16 && + git tag old-tip && + + git config core.multiPackIndex true && + if test "MIDX" = "$from" + then + git repack -Ad && + git multi-pack-index write --bitmap + else + git repack -Adb + fi + ) + ' + + test_expect_success "build bitmap from existing ($from -> $to)" ' + ( + cd repo && + test_commit_bulk --id=further 16 && + git tag new-tip && + + if test "MIDX" = "$to" + then + git repack -d && + git multi-pack-index write --bitmap + else + git repack -Adb + fi + ) + ' + + test_expect_success "verify resulting bitmaps ($from -> $to)" ' + ( + cd repo && + git for-each-ref && + git rev-list --test-bitmap refs/tags/old-tip && + git rev-list --test-bitmap refs/tags/new-tip + ) + ' +} + +bitmap_reuse_tests 'pack' 'MIDX' +bitmap_reuse_tests 'MIDX' 'pack' +bitmap_reuse_tests 'MIDX' 'MIDX' + +test_expect_success 'missing object closure fails gracefully' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit loose && + test_commit packed && + + # Do not pass "--revs"; we want a pack without the "loose" + # commit. + git pack-objects $objdir/pack/pack <<-EOF && + $(git rev-parse packed) + EOF + + test_must_fail git multi-pack-index write --bitmap 2>err && + grep "doesn.t have full closure" err && + test_path_is_missing $midx + ) +' + +test_expect_success 'setup partial bitmaps' ' + test_commit packed && + git repack && + test_commit loose && + git multi-pack-index write --bitmap 2>err && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + test_path_is_file $midx-$(midx_checksum $objdir).rev +' + +basic_bitmap_tests HEAD~ + +test_expect_success 'removing a MIDX clears stale bitmaps' ' + rm -fr repo && + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + test_commit base && + git repack && + git multi-pack-index write --bitmap && + + # Write a MIDX and bitmap; remove the MIDX but leave the bitmap. + stale_bitmap=$midx-$(midx_checksum $objdir).bitmap && + stale_rev=$midx-$(midx_checksum $objdir).rev && + rm $midx && + + # Then write a new MIDX. + test_commit new && + git repack && + git multi-pack-index write --bitmap && + + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + test_path_is_file $midx-$(midx_checksum $objdir).rev && + test_path_is_missing $stale_bitmap && + test_path_is_missing $stale_rev + ) +' + +test_expect_success 'pack.preferBitmapTips' ' + git init repo && + test_when_finished "rm -fr repo" && + ( + cd repo && + + test_commit_bulk --message="%s" 103 && + + git log --format="%H" >commits.raw && + sort <commits.raw >commits && + + git log --format="create refs/tags/%s %H" HEAD >refs && + git update-ref --stdin <refs && + + git multi-pack-index write --bitmap && + test_path_is_file $midx && + test_path_is_file $midx-$(midx_checksum $objdir).bitmap && + test_path_is_file $midx-$(midx_checksum $objdir).rev && + + test-tool bitmap list-commits | sort >bitmaps && + comm -13 bitmaps commits >before && + test_line_count = 1 before && + + perl -ne "printf(\"create refs/tags/include/%d \", $.); print" \ + <before | git update-ref --stdin && + + rm -fr $midx-$(midx_checksum $objdir).bitmap && + rm -fr $midx-$(midx_checksum $objdir).rev && + rm -fr $midx && + + git -c pack.preferBitmapTips=refs/tags/include \ + multi-pack-index write --bitmap && + test-tool bitmap list-commits | sort >bitmaps && + comm -13 bitmaps commits >after && + + ! test_cmp before after + ) +' + +test_done diff --git a/t/t7700-repack.sh b/t/t7700-repack.sh index 25b235c063..98eda3bfeb 100755 --- a/t/t7700-repack.sh +++ b/t/t7700-repack.sh @@ -63,13 +63,14 @@ test_expect_success 'objects in packs marked .keep are not repacked' ' test_expect_success 'writing bitmaps via command-line can duplicate .keep objects' ' # build on $oid, $packid, and .keep state from previous - git repack -Adbl && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 git repack -Adbl && test_has_duplicate_object true ' test_expect_success 'writing bitmaps via config can duplicate .keep objects' ' # build on $oid, $packid, and .keep state from previous - git -c repack.writebitmaps=true repack -Adl && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -c repack.writebitmaps=true repack -Adl && test_has_duplicate_object true ' @@ -189,7 +190,9 @@ test_expect_success 'repack --keep-pack' ' test_expect_success 'bitmaps are created by default in bare repos' ' git clone --bare .git bare.git && - git -C bare.git repack -ad && + rm -f bare.git/objects/pack/*.bitmap && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -C bare.git repack -ad && bitmap=$(ls bare.git/objects/pack/*.bitmap) && test_path_is_file "$bitmap" ' @@ -200,7 +203,8 @@ test_expect_success 'incremental repack does not complain' ' ' test_expect_success 'bitmaps can be disabled on bare repos' ' - git -c repack.writeBitmaps=false -C bare.git repack -ad && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -c repack.writeBitmaps=false -C bare.git repack -ad && bitmap=$(ls bare.git/objects/pack/*.bitmap || :) && test -z "$bitmap" ' @@ -211,7 +215,8 @@ test_expect_success 'no bitmaps created if .keep files present' ' keep=${pack%.pack}.keep && test_when_finished "rm -f \"\$keep\"" && >"$keep" && - git -C bare.git repack -ad 2>stderr && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -C bare.git repack -ad 2>stderr && test_must_be_empty stderr && find bare.git/objects/pack/ -type f -name "*.bitmap" >actual && test_must_be_empty actual @@ -222,7 +227,8 @@ test_expect_success 'auto-bitmaps do not complain if unavailable' ' blob=$(test-tool genrandom big $((1024*1024)) | git -C bare.git hash-object -w --stdin) && git -C bare.git update-ref refs/tags/big $blob && - git -C bare.git repack -ad 2>stderr && + GIT_TEST_MULTI_PACK_INDEX_WRITE_BITMAP=0 \ + git -C bare.git repack -ad 2>stderr && test_must_be_empty stderr && find bare.git/objects/pack -type f -name "*.bitmap" >actual && test_must_be_empty actual |