From 551cf8b655fa73c90dabea633d41b5f10deffaf2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 12 Feb 2020 21:16:15 -0500 Subject: pack-bitmap: factor out type iterator initialization When count_object_type() wants to iterate over the bitmap of all objects of a certain type, we have to pair up OBJ_COMMIT with bitmap->commits, and so forth. Since we're about to add more code to iterate over these bitmaps, let's pull the initialization into its own function. We can also use this to simplify traverse_bitmap_commit_list(). It accomplishes the same thing by manually passing the object type and the bitmap to show_objects_for_type(), but using our helper we just need the object type. Note there's one small code change here: previously we'd simply return zero when counting an unknown object type, and now we'll BUG(). This shouldn't matter in practice, as all of the callers pass in only usual commit/tree/blob/tag types. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap.c | 63 +++++++++++++++++++++++++++++++---------------------------- 1 file changed, 33 insertions(+), 30 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index e07c798879..9ca356ee29 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -616,9 +616,35 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, } } +static void init_type_iterator(struct ewah_iterator *it, + struct bitmap_index *bitmap_git, + enum object_type type) +{ + switch (type) { + case OBJ_COMMIT: + ewah_iterator_init(it, bitmap_git->commits); + break; + + case OBJ_TREE: + ewah_iterator_init(it, bitmap_git->trees); + break; + + case OBJ_BLOB: + ewah_iterator_init(it, bitmap_git->blobs); + break; + + case OBJ_TAG: + ewah_iterator_init(it, bitmap_git->tags); + break; + + default: + BUG("object type %d not stored by bitmap type index", type); + break; + } +} + static void show_objects_for_type( struct bitmap_index *bitmap_git, - struct ewah_bitmap *type_filter, enum object_type object_type, show_reachable_fn show_reach) { @@ -633,7 +659,7 @@ static void show_objects_for_type( if (bitmap_git->reuse_objects == bitmap_git->pack->num_objects) return; - ewah_iterator_init(&it, type_filter); + init_type_iterator(&it, bitmap_git, object_type); while (i < objects->word_alloc && ewah_iterator_next(&filter, &it)) { eword_t word = objects->words[i] & filter; @@ -835,14 +861,10 @@ void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git, { assert(bitmap_git->result); - show_objects_for_type(bitmap_git, bitmap_git->commits, - OBJ_COMMIT, show_reachable); - show_objects_for_type(bitmap_git, bitmap_git->trees, - OBJ_TREE, show_reachable); - show_objects_for_type(bitmap_git, bitmap_git->blobs, - OBJ_BLOB, show_reachable); - show_objects_for_type(bitmap_git, bitmap_git->tags, - OBJ_TAG, show_reachable); + show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable); + show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable); + show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable); + show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable); show_extended_objects(bitmap_git, show_reachable); } @@ -857,26 +879,7 @@ static uint32_t count_object_type(struct bitmap_index *bitmap_git, struct ewah_iterator it; eword_t filter; - switch (type) { - case OBJ_COMMIT: - ewah_iterator_init(&it, bitmap_git->commits); - break; - - case OBJ_TREE: - ewah_iterator_init(&it, bitmap_git->trees); - break; - - case OBJ_BLOB: - ewah_iterator_init(&it, bitmap_git->blobs); - break; - - case OBJ_TAG: - ewah_iterator_init(&it, bitmap_git->tags); - break; - - default: - return 0; - } + init_type_iterator(&it, bitmap_git, type); while (i < objects->word_alloc && ewah_iterator_next(&filter, &it)) { eword_t word = objects->words[i++] & filter; -- cgit v1.2.3 From acac50dd8c2c9725841b3e9143d78c6345dc076c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 12 Feb 2020 21:16:33 -0500 Subject: pack-bitmap: fix leak of haves/wants object lists When we do a bitmap-aware revision traversal, we create an object_list for each of the "haves" and "wants" tips. After creating the result bitmaps these are no longer needed or used, but we never free the list memory. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- object.c | 9 +++++++++ object.h | 2 ++ pack-bitmap.c | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/object.c b/object.c index 142ef69399..4d11949b38 100644 --- a/object.c +++ b/object.c @@ -307,6 +307,15 @@ int object_list_contains(struct object_list *list, struct object *obj) return 0; } +void object_list_free(struct object_list **list) +{ + while (*list) { + struct object_list *p = *list; + *list = p->next; + free(p); + } +} + /* * A zero-length string to which object_array_entry::name can be * initialized without requiring a malloc/free. diff --git a/object.h b/object.h index 25f5ab3d54..2dbabfca0a 100644 --- a/object.h +++ b/object.h @@ -151,6 +151,8 @@ struct object_list *object_list_insert(struct object *item, int object_list_contains(struct object_list *list, struct object *obj); +void object_list_free(struct object_list **list); + /* Object array handling .. */ void add_object_array(struct object *obj, const char *name, struct object_array *array); void add_object_array_with_path(struct object *obj, const char *name, struct object_array *array, unsigned mode, const char *path); diff --git a/pack-bitmap.c b/pack-bitmap.c index 9ca356ee29..6c06099dc7 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -787,10 +787,15 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs) bitmap_git->result = wants_bitmap; bitmap_git->haves = haves_bitmap; + object_list_free(&wants); + object_list_free(&haves); + return bitmap_git; cleanup: free_bitmap_index(bitmap_git); + object_list_free(&wants); + object_list_free(&haves); return NULL; } -- cgit v1.2.3 From e03f928e2ab92e47467d48520a73a19582dff286 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 12 Feb 2020 21:17:30 -0500 Subject: rev-list: fallback to non-bitmap traversal when filtering The "--use-bitmap-index" option is usually aspirational: if we have bitmaps and the request can be fulfilled more quickly using them we'll do so, but otherwise fall back to a non-bitmap traversal. The exception is object filtering, which explicitly dies if the two options are combined. Let's convert this to the usual fallback behavior. This is a minor convenience for now (since the caller can easily know that --filter and --use-bitmap-index don't combine), but will become much more useful as we start to support _some_ filters with bitmaps, but not others. The test infrastructure here is bigger than necessary for checking this one small feature. But it will serve as the basis for more filtering bitmap tests in future patches. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 4 ++-- t/t6113-rev-list-bitmap-filters.sh | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100755 t/t6113-rev-list-bitmap-filters.sh diff --git a/builtin/rev-list.c b/builtin/rev-list.c index e28d62ec64..bce406bd1e 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -521,8 +521,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (revs.show_notes) die(_("rev-list does not support display of notes")); - if (filter_options.choice && use_bitmap_index) - die(_("cannot combine --use-bitmap-index with object filtering")); + if (filter_options.choice) + use_bitmap_index = 0; save_commit_buffer = (revs.verbose_header || revs.grep_filter.pattern_list || diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh new file mode 100755 index 0000000000..977f8d0930 --- /dev/null +++ b/t/t6113-rev-list-bitmap-filters.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='rev-list combining bitmaps and filters' +. ./test-lib.sh + +test_expect_success 'set up bitmapped repo' ' + # one commit will have bitmaps, the other will not + test_commit one && + git repack -adb && + test_commit two +' + +test_expect_success 'filters fallback to non-bitmap traversal' ' + # use a path-based filter, since they are inherently incompatible with + # bitmaps (i.e., this test will never get confused by later code to + # combine the features) + filter=$(echo "!one" | git hash-object -w --stdin) && + git rev-list --objects --filter=sparse:oid=$filter HEAD >expect && + git rev-list --use-bitmap-index \ + --objects --filter=sparse:oid=$filter HEAD >actual && + test_cmp expect actual +' + +test_done -- cgit v1.2.3 From d90fe06ea7dd15bdbd555ad2f4bfdd069032b697 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:16 -0500 Subject: pack-bitmap: refuse to do a bitmap traversal with pathspecs rev-list has refused to use bitmaps with pathspec limiting since c8a70d3509 (rev-list: disable --use-bitmap-index when pruning commits, 2015-07-01). But this is true not just for rev-list, but for anyone who calls prepare_bitmap_walk(); the code isn't equipped to handle this case. We never noticed because the only other callers would never pass a pathspec limiter. But let's push the check down into prepare_bitmap_walk() anyway. That's a more logical place for it to live, as callers shouldn't need to know the details (and must be prepared to fall back to a regular traversal anyway, since there might not be bitmaps in the repository). It would also prepare us for a day where this case _is_ handled, but that's pretty unlikely. E.g., we could use bitmaps to generate the set of commits, and then diff each commit to see if it matches the pathspec. That would be slightly faster than a naive traversal that actually walks the commits. But you'd probably do better still to make use of the newer commit-graph feature to make walking the commits very cheap. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 2 +- pack-bitmap.c | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index bce406bd1e..4cb5a52dee 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -533,7 +533,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (show_progress) progress = start_delayed_progress(show_progress, 0); - if (use_bitmap_index && !revs.prune) { + if (use_bitmap_index) { if (revs.count && !revs.left_right && !revs.cherry_mark) { uint32_t commit_count; int max_count = revs.max_count; diff --git a/pack-bitmap.c b/pack-bitmap.c index 6c06099dc7..a97b717e55 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -715,9 +715,19 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs) struct bitmap *wants_bitmap = NULL; struct bitmap *haves_bitmap = NULL; - struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); + struct bitmap_index *bitmap_git; + + /* + * We can't do pathspec limiting with bitmaps, because we don't know + * which commits are associated with which object changes (let alone + * even which objects are associated with which paths). + */ + if (revs->prune) + return NULL; + /* try to open a bitmapped pack, but don't parse it yet * because we may not need to use it */ + bitmap_git = xcalloc(1, sizeof(*bitmap_git)); if (open_pack_bitmap(revs->repo, bitmap_git) < 0) goto cleanup; -- cgit v1.2.3 From 792f8119986bd354eb4ea0858dfd45904120c257 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:18 -0500 Subject: rev-list: factor out bitmap-optimized routines There are a few operations in rev-list that are optimized for bitmaps. Rather than having the code inline in cmd_rev_list(), let's move them into helpers. This not only makes the flow of the main function simpler, but it lets us replace the complex "can we do the optimization?" conditionals with a series of early returns from the functions. That also makes it easy to add comments explaining those conditions. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 88 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 4cb5a52dee..38c5ca5603 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -364,6 +364,69 @@ static inline int parse_missing_action_value(const char *value) return 0; } +static int try_bitmap_count(struct rev_info *revs) +{ + uint32_t commit_count; + int max_count; + struct bitmap_index *bitmap_git; + + /* This function only handles counting, not general traversal. */ + if (!revs->count) + return -1; + + /* + * A bitmap result can't know left/right, etc, because we don't + * actually traverse. + */ + if (revs->left_right || revs->cherry_mark) + return -1; + + /* + * This must be saved before doing any walking, since the revision + * machinery will count it down to zero while traversing. + */ + max_count = revs->max_count; + + bitmap_git = prepare_bitmap_walk(revs); + if (!bitmap_git) + return -1; + + count_bitmap_commit_list(bitmap_git, &commit_count, NULL, NULL, NULL); + if (max_count >= 0 && max_count < commit_count) + commit_count = max_count; + + printf("%d\n", commit_count); + free_bitmap_index(bitmap_git); + return 0; +} + +static int try_bitmap_traversal(struct rev_info *revs) +{ + struct bitmap_index *bitmap_git; + + /* + * We can't use a bitmap result with a traversal limit, since the set + * of commits we'd get would be essentially random. + */ + if (revs->max_count >= 0) + return -1; + + /* + * Our bitmap result will return all objects, and we're not + * yet prepared to show only particular types. + */ + if (!revs->tag_objects || !revs->tree_objects || !revs->blob_objects) + return -1; + + bitmap_git = prepare_bitmap_walk(revs); + if (!bitmap_git) + return -1; + + traverse_bitmap_commit_list(bitmap_git, &show_object_fast); + free_bitmap_index(bitmap_git); + return 0; +} + int cmd_rev_list(int argc, const char **argv, const char *prefix) { struct rev_info revs; @@ -534,27 +597,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) progress = start_delayed_progress(show_progress, 0); if (use_bitmap_index) { - if (revs.count && !revs.left_right && !revs.cherry_mark) { - uint32_t commit_count; - int max_count = revs.max_count; - struct bitmap_index *bitmap_git; - if ((bitmap_git = prepare_bitmap_walk(&revs))) { - count_bitmap_commit_list(bitmap_git, &commit_count, NULL, NULL, NULL); - if (max_count >= 0 && max_count < commit_count) - commit_count = max_count; - printf("%d\n", commit_count); - free_bitmap_index(bitmap_git); - return 0; - } - } else if (revs.max_count < 0 && - revs.tag_objects && revs.tree_objects && revs.blob_objects) { - struct bitmap_index *bitmap_git; - if ((bitmap_git = prepare_bitmap_walk(&revs))) { - traverse_bitmap_commit_list(bitmap_git, &show_object_fast); - free_bitmap_index(bitmap_git); - return 0; - } - } + if (!try_bitmap_count(&revs)) + return 0; + if (!try_bitmap_traversal(&revs)) + return 0; } if (prepare_revision_walk(&revs)) -- cgit v1.2.3 From 55cb10f9b5cd7216652a5879b792bcd7ac173035 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:20 -0500 Subject: rev-list: make --count work with --objects The current behavior from "rev-list --count --objects" is nonsensical: we enumerate all of the objects except commits, but then give a count of commits. This wasn't planned, and is just what the code happens to do. Instead, let's give the answer the user almost certainly wanted: the full count of objects. Note that there are more complicated cases around cherry-marking, etc. We'll punt on those for now, but let the user know that we can't produce an answer (rather than giving them something useless). We'll test both the new feature as well as a vanilla --count of commits, since that surprisingly doesn't seem to be covered in the existing tests. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 13 +++++++++++++ t/t6000-rev-list-misc.sh | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 38c5ca5603..9452123988 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -253,11 +253,19 @@ static int finish_object(struct object *obj, const char *name, void *cb_data) static void show_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; + struct rev_info *revs = info->revs; + if (finish_object(obj, name, cb_data)) return; display_progress(progress, ++progress_counter); if (info->flags & REV_LIST_QUIET) return; + + if (revs->count) { + revs->count_right++; + return; + } + if (arg_show_object_names) show_object_with_name(stdout, obj, name); else @@ -584,6 +592,11 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (revs.show_notes) die(_("rev-list does not support display of notes")); + if (revs.count && + (revs.tag_objects || revs.tree_objects || revs.blob_objects) && + (revs.left_right || revs.cherry_mark)) + die(_("marked counting is incompatible with --objects")); + if (filter_options.choice) use_bitmap_index = 0; diff --git a/t/t6000-rev-list-misc.sh b/t/t6000-rev-list-misc.sh index b8cf82349b..383f2c457d 100755 --- a/t/t6000-rev-list-misc.sh +++ b/t/t6000-rev-list-misc.sh @@ -148,4 +148,16 @@ test_expect_success 'rev-list --end-of-options' ' test_cmp expect actual ' +test_expect_success 'rev-list --count' ' + count=$(git rev-list --count HEAD) && + git rev-list HEAD >actual && + test_line_count = $count actual +' + +test_expect_success 'rev-list --count --objects' ' + count=$(git rev-list --count --objects HEAD) && + git rev-list --objects HEAD >actual && + test_line_count = $count actual +' + test_done -- cgit v1.2.3 From 608d9c93658d6ec2d585748ed45218cc1f404640 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:22 -0500 Subject: rev-list: allow bitmaps when counting objects The prior commit taught "--count --objects" to work without bitmaps. We should be able to get the same answer much more quickly with bitmaps. Note that we punt on the max_count case here. This perhaps _could_ be made to work if we find all of the boundary commits and treat them as UNINTERESTING, subtracting them (and their reachable objects) from the set we return. That implies an actual commit traversal, but we'd still be faster due to avoiding opening up any trees. Given the complexity and the fact that anyone is unlikely to want this, it makes sense to just fall back to the non-bitmap case for now. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 21 ++++++++++++++++++--- t/t5310-pack-bitmaps.sh | 6 ++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 9452123988..70f3207ecc 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -374,7 +374,10 @@ static inline int parse_missing_action_value(const char *value) static int try_bitmap_count(struct rev_info *revs) { - uint32_t commit_count; + uint32_t commit_count = 0, + tag_count = 0, + tree_count = 0, + blob_count = 0; int max_count; struct bitmap_index *bitmap_git; @@ -389,6 +392,15 @@ static int try_bitmap_count(struct rev_info *revs) if (revs->left_right || revs->cherry_mark) return -1; + /* + * If we're counting reachable objects, we can't handle a max count of + * commits to traverse, since we don't know which objects go with which + * commit. + */ + if (revs->max_count >= 0 && + (revs->tag_objects || revs->tree_objects || revs->blob_objects)) + return -1; + /* * This must be saved before doing any walking, since the revision * machinery will count it down to zero while traversing. @@ -399,11 +411,14 @@ static int try_bitmap_count(struct rev_info *revs) if (!bitmap_git) return -1; - count_bitmap_commit_list(bitmap_git, &commit_count, NULL, NULL, NULL); + count_bitmap_commit_list(bitmap_git, &commit_count, + revs->tree_objects ? &tree_count : NULL, + revs->blob_objects ? &blob_count : NULL, + revs->tag_objects ? &tag_count : NULL); if (max_count >= 0 && max_count < commit_count) commit_count = max_count; - printf("%d\n", commit_count); + printf("%d\n", commit_count + tree_count + blob_count + tag_count); free_bitmap_index(bitmap_git); return 0; } diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 6640329ebf..7ba7d294a5 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -74,6 +74,12 @@ rev_list_tests() { test_cmp expect actual ' + test_expect_success "counting objects via bitmap ($state)" ' + git rev-list --count --objects HEAD >expect && + git rev-list --use-bitmap-index --count --objects HEAD >actual && + test_cmp expect actual + ' + test_expect_success "enumerate --objects ($state)" ' git rev-list --objects --use-bitmap-index HEAD >tmp && cut -d" " -f1 tmp2 && -- cgit v1.2.3 From ea047a8eb4f45c1c4e3c9b1af919bf0361ac1b7c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:25 -0500 Subject: t5310: factor out bitmap traversal comparison We check the results of "rev-list --use-bitmap-index" by comparing it to the same traversal without the bitmap option. However, this is a little tricky to do because of some output differences (see the included comment for details). Let's pull this out into a helper function, since we'll be adding some similar tests. While we're at it, let's also try to confirm that the bitmap output did indeed use bitmaps. Since the code internally falls back to the non-bitmap path in some cases, the tests are at risk of becoming trivial noops. This is a bit fragile, as not all outputs will differ (e.g., looking at only the commits from a fully-bitmapped pack will end up exactly the same as the normal traversal order, since it also matches the pack order). So we'll provide an escape hatch by which tests can disable this check (which should only be used after manually confirming that bitmaps kicked in). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5310-pack-bitmaps.sh | 10 +++------- t/test-lib-functions.sh | 27 +++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 7ba7d294a5..b8645ae070 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -81,13 +81,9 @@ rev_list_tests() { ' test_expect_success "enumerate --objects ($state)" ' - git rev-list --objects --use-bitmap-index HEAD >tmp && - cut -d" " -f1 tmp2 && - sort actual && - git rev-list --objects HEAD >tmp && - cut -d" " -f1 tmp2 && - sort expect && - test_cmp expect actual + git rev-list --objects --use-bitmap-index HEAD >actual && + git rev-list --objects HEAD >expect && + test_bitmap_traversal expect actual ' test_expect_success "bitmap --objects handles non-commit objects ($state)" ' diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh index 284c52d076..352c213d52 100644 --- a/t/test-lib-functions.sh +++ b/t/test-lib-functions.sh @@ -1516,3 +1516,30 @@ test_set_port () { port=$(($port + ${GIT_TEST_STRESS_JOB_NR:-0})) eval $var=$port } + +# Compare a file containing rev-list bitmap traversal output to its non-bitmap +# counterpart. You can't just use test_cmp for this, because the two produce +# subtly different output: +# +# - regular output is in traversal order, whereas bitmap is split by type, +# with non-packed objects at the end +# +# - regular output has a space and the pathname appended to non-commit +# objects; bitmap output omits this +# +# This function normalizes and compares the two. The second file should +# always be the bitmap output. +test_bitmap_traversal () { + if test "$1" = "--no-confirm-bitmaps" + then + shift + elif cmp "$1" "$2" + then + echo >&2 "identical raw outputs; are you sure bitmaps were used?" + return 1 + fi && + cut -d' ' -f1 "$1" | sort >"$1.normalized" && + sort "$2" >"$2.normalized" && + test_cmp "$1.normalized" "$2.normalized" && + rm -f "$1.normalized" "$2.normalized" +} -- cgit v1.2.3 From 4eb707ebd681eb85306071db33ed70186d1642ac Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:27 -0500 Subject: rev-list: allow commit-only bitmap traversals Ever since we added reachability bitmap support, we've been able to use it with rev-list to get the full list of objects, like: git rev-list --objects --use-bitmap-index --all But you can't do so without --objects, since we weren't ready to just show the commits. However, the internals of the bitmap code are mostly ready for this: they avoid opening up trees when walking to fill in the bitmaps. We just need to actually pass in the rev_info to traverse_bitmap_commit_list() so it knows which types to bother triggering our callback for. For completeness, the perf test now covers both the existing --objects case, as well as the new commits-only behavior (the objects one got way faster when we introduced bitmaps, but obviously isn't improved now). Here are numbers for linux.git: Test HEAD^ HEAD ------------------------------------------------------------------------ 5310.7: rev-list (commits) 8.29(8.10+0.19) 1.76(1.72+0.04) -78.8% 5310.8: rev-list (objects) 8.06(7.94+0.12) 8.14(7.94+0.13) +1.0% That run was cheating a little, as I didn't have any commit-graph in the repository, and we'd built it by default these days when running git-gc. Here are numbers with a commit-graph: Test HEAD^ HEAD ------------------------------------------------------------------------ 5310.7: rev-list (commits) 0.70(0.58+0.12) 0.51(0.46+0.04) -27.1% 5310.8: rev-list (objects) 6.20(6.09+0.10) 6.27(6.16+0.11) +1.1% Still an improvement, but a lot less impressive. We could have the perf script remove any commit-graph to show the out-sized effect, but it probably makes sense to leave it in what would be a more typical setup. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 3 ++- builtin/rev-list.c | 9 +-------- pack-bitmap.c | 20 +++++++++++++++----- pack-bitmap.h | 1 + reachable.c | 2 +- t/perf/p5310-pack-bitmaps.sh | 8 ++++++++ t/t5310-pack-bitmaps.sh | 6 ++++++ 7 files changed, 34 insertions(+), 15 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 393c20a2d7..06915ebe7f 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3054,7 +3054,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs) display_progress(progress_state, nr_result); } - traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap); + traverse_bitmap_commit_list(bitmap_git, revs, + &add_object_entry_from_bitmap); return 0; } diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 70f3207ecc..937324cef0 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -434,18 +434,11 @@ static int try_bitmap_traversal(struct rev_info *revs) if (revs->max_count >= 0) return -1; - /* - * Our bitmap result will return all objects, and we're not - * yet prepared to show only particular types. - */ - if (!revs->tag_objects || !revs->tree_objects || !revs->blob_objects) - return -1; - bitmap_git = prepare_bitmap_walk(revs); if (!bitmap_git) return -1; - traverse_bitmap_commit_list(bitmap_git, &show_object_fast); + traverse_bitmap_commit_list(bitmap_git, revs, &show_object_fast); free_bitmap_index(bitmap_git); return 0; } diff --git a/pack-bitmap.c b/pack-bitmap.c index a97b717e55..2fbc748b19 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -599,6 +599,7 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, } static void show_extended_objects(struct bitmap_index *bitmap_git, + struct rev_info *revs, show_reachable_fn show_reach) { struct bitmap *objects = bitmap_git->result; @@ -612,6 +613,11 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, continue; obj = eindex->objects[i]; + if ((obj->type == OBJ_BLOB && !revs->blob_objects) || + (obj->type == OBJ_TREE && !revs->tree_objects) || + (obj->type == OBJ_TAG && !revs->tag_objects)) + continue; + show_reach(&obj->oid, obj->type, 0, eindex->hashes[i], NULL, 0); } } @@ -872,16 +878,20 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, } void traverse_bitmap_commit_list(struct bitmap_index *bitmap_git, + struct rev_info *revs, show_reachable_fn show_reachable) { assert(bitmap_git->result); show_objects_for_type(bitmap_git, OBJ_COMMIT, show_reachable); - show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable); - show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable); - show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable); - - show_extended_objects(bitmap_git, show_reachable); + if (revs->tree_objects) + show_objects_for_type(bitmap_git, OBJ_TREE, show_reachable); + if (revs->blob_objects) + show_objects_for_type(bitmap_git, OBJ_BLOB, show_reachable); + if (revs->tag_objects) + show_objects_for_type(bitmap_git, OBJ_TAG, show_reachable); + + show_extended_objects(bitmap_git, revs, show_reachable); } static uint32_t count_object_type(struct bitmap_index *bitmap_git, diff --git a/pack-bitmap.h b/pack-bitmap.h index 466c5afa09..b0c06a212e 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -44,6 +44,7 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r); void count_bitmap_commit_list(struct bitmap_index *, uint32_t *commits, uint32_t *trees, uint32_t *blobs, uint32_t *tags); void traverse_bitmap_commit_list(struct bitmap_index *, + struct rev_info *revs, show_reachable_fn show_reachable); void test_bitmap_walk(struct rev_info *revs); struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs); diff --git a/reachable.c b/reachable.c index 8f50235b28..0919f025c4 100644 --- a/reachable.c +++ b/reachable.c @@ -225,7 +225,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, bitmap_git = prepare_bitmap_walk(revs); if (bitmap_git) { - traverse_bitmap_commit_list(bitmap_git, mark_object_seen); + traverse_bitmap_commit_list(bitmap_git, revs, mark_object_seen); free_bitmap_index(bitmap_git); return; } diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 6a3a42531b..e52f66ec9e 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -39,6 +39,14 @@ test_perf 'pack to file (bitmap)' ' git pack-objects --use-bitmap-index --all pack1b /dev/null ' +test_perf 'rev-list (commits)' ' + git rev-list --all --use-bitmap-index >/dev/null +' + +test_perf 'rev-list (objects)' ' + git rev-list --all --use-bitmap-index --objects >/dev/null +' + test_expect_success 'create partial bitmap state' ' # pick a commit to represent the repo tip in the past cutoff=$(git rev-list HEAD~100 -1) && diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index b8645ae070..2c64d0c441 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -80,6 +80,12 @@ rev_list_tests() { test_cmp expect actual ' + test_expect_success "enumerate commits ($state)" ' + git rev-list --use-bitmap-index HEAD >actual && + git rev-list HEAD >expect && + test_bitmap_traversal --no-confirm-bitmaps expect actual + ' + test_expect_success "enumerate --objects ($state)" ' git rev-list --objects --use-bitmap-index HEAD >actual && git rev-list --objects HEAD >expect && -- cgit v1.2.3 From 6663ae0a0818aba5d4de289b1a37e1961ad6c367 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:29 -0500 Subject: pack-bitmap: basic noop bitmap filter infrastructure Currently you can't use object filters with bitmaps, but we plan to support at least some filters with bitmaps. Let's introduce some infrastructure that will help us do that: - prepare_bitmap_walk() now accepts a list_objects_filter_options parameter (which can be NULL for no filtering; all the current callers pass this) - we'll bail early if the filter is incompatible with bitmaps (just as we would if there were no bitmaps at all). Currently all filters are incompatible. - we'll filter the resulting bitmap; since there are no supported filters yet, this is always a noop. There should be no behavior change yet, but we'll support some actual filters in a future patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 2 +- builtin/rev-list.c | 4 ++-- pack-bitmap.c | 26 +++++++++++++++++++++++++- pack-bitmap.h | 4 +++- reachable.c | 2 +- 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 06915ebe7f..2bb81c2133 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3040,7 +3040,7 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { - if (!(bitmap_git = prepare_bitmap_walk(revs))) + if (!(bitmap_git = prepare_bitmap_walk(revs, NULL))) return -1; if (pack_options_allow_reuse() && diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 937324cef0..6ff5e175fa 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -407,7 +407,7 @@ static int try_bitmap_count(struct rev_info *revs) */ max_count = revs->max_count; - bitmap_git = prepare_bitmap_walk(revs); + bitmap_git = prepare_bitmap_walk(revs, NULL); if (!bitmap_git) return -1; @@ -434,7 +434,7 @@ static int try_bitmap_traversal(struct rev_info *revs) if (revs->max_count >= 0) return -1; - bitmap_git = prepare_bitmap_walk(revs); + bitmap_git = prepare_bitmap_walk(revs, NULL); if (!bitmap_git) return -1; diff --git a/pack-bitmap.c b/pack-bitmap.c index 2fbc748b19..48c8694f92 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -12,6 +12,7 @@ #include "packfile.h" #include "repository.h" #include "object-store.h" +#include "list-objects-filter-options.h" /* * An entry on the bitmap index, representing the bitmap for a given @@ -711,7 +712,25 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git, return 0; } -struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs) +static int filter_bitmap(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter, + struct list_objects_filter_options *filter) +{ + if (!filter || filter->choice == LOFC_DISABLED) + return 0; + + /* filter choice not handled */ + return -1; +} + +static int can_filter_bitmap(struct list_objects_filter_options *filter) +{ + return !filter_bitmap(NULL, NULL, NULL, filter); +} + +struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, + struct list_objects_filter_options *filter) { unsigned int i; @@ -731,6 +750,9 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs) if (revs->prune) return NULL; + if (!can_filter_bitmap(filter)) + return NULL; + /* try to open a bitmapped pack, but don't parse it yet * because we may not need to use it */ bitmap_git = xcalloc(1, sizeof(*bitmap_git)); @@ -800,6 +822,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs) if (haves_bitmap) bitmap_and_not(wants_bitmap, haves_bitmap); + filter_bitmap(bitmap_git, wants, wants_bitmap, filter); + bitmap_git->result = wants_bitmap; bitmap_git->haves = haves_bitmap; diff --git a/pack-bitmap.h b/pack-bitmap.h index b0c06a212e..956775d0bb 100644 --- a/pack-bitmap.h +++ b/pack-bitmap.h @@ -8,6 +8,7 @@ struct commit; struct repository; struct rev_info; +struct list_objects_filter_options; static const char BITMAP_IDX_SIGNATURE[] = {'B', 'I', 'T', 'M'}; @@ -47,7 +48,8 @@ void traverse_bitmap_commit_list(struct bitmap_index *, struct rev_info *revs, show_reachable_fn show_reachable); void test_bitmap_walk(struct rev_info *revs); -struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs); +struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, + struct list_objects_filter_options *filter); int reuse_partial_packfile_from_bitmap(struct bitmap_index *, struct packed_git **packfile, uint32_t *entries, off_t *up_to); diff --git a/reachable.c b/reachable.c index 0919f025c4..77a60c70a5 100644 --- a/reachable.c +++ b/reachable.c @@ -223,7 +223,7 @@ void mark_reachable_objects(struct rev_info *revs, int mark_reflog, cp.progress = progress; cp.count = 0; - bitmap_git = prepare_bitmap_walk(revs); + bitmap_git = prepare_bitmap_walk(revs, NULL); if (bitmap_git) { traverse_bitmap_commit_list(bitmap_git, revs, mark_object_seen); free_bitmap_index(bitmap_git); -- cgit v1.2.3 From 2aaeb9ac414d75f875efa968480db1ce85dc8dc5 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:32 -0500 Subject: rev-list: use bitmap filters for traversal This just passes the filter-options struct to prepare_bitmap_walk(). Since the bitmap code doesn't actually support any filters yet, it will fallback to the non-bitmap code if any --filter is specified. But this lets us exercise that rejection code path, as well as getting us ready to test filters via rev-list when we _do_ support them. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 6ff5e175fa..35e14ad2ed 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -372,7 +372,8 @@ static inline int parse_missing_action_value(const char *value) return 0; } -static int try_bitmap_count(struct rev_info *revs) +static int try_bitmap_count(struct rev_info *revs, + struct list_objects_filter_options *filter) { uint32_t commit_count = 0, tag_count = 0, @@ -407,7 +408,7 @@ static int try_bitmap_count(struct rev_info *revs) */ max_count = revs->max_count; - bitmap_git = prepare_bitmap_walk(revs, NULL); + bitmap_git = prepare_bitmap_walk(revs, filter); if (!bitmap_git) return -1; @@ -423,7 +424,8 @@ static int try_bitmap_count(struct rev_info *revs) return 0; } -static int try_bitmap_traversal(struct rev_info *revs) +static int try_bitmap_traversal(struct rev_info *revs, + struct list_objects_filter_options *filter) { struct bitmap_index *bitmap_git; @@ -434,7 +436,7 @@ static int try_bitmap_traversal(struct rev_info *revs) if (revs->max_count >= 0) return -1; - bitmap_git = prepare_bitmap_walk(revs, NULL); + bitmap_git = prepare_bitmap_walk(revs, filter); if (!bitmap_git) return -1; @@ -605,9 +607,6 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) (revs.left_right || revs.cherry_mark)) die(_("marked counting is incompatible with --objects")); - if (filter_options.choice) - use_bitmap_index = 0; - save_commit_buffer = (revs.verbose_header || revs.grep_filter.pattern_list || revs.grep_filter.header_list); @@ -618,9 +617,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) progress = start_delayed_progress(show_progress, 0); if (use_bitmap_index) { - if (!try_bitmap_count(&revs)) + if (!try_bitmap_count(&revs, &filter_options)) return 0; - if (!try_bitmap_traversal(&revs)) + if (!try_bitmap_traversal(&revs, &filter_options)) return 0; } -- cgit v1.2.3 From cc4aa28506e079e0c17cfbe78743530795803ea8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:34 -0500 Subject: bitmap: add bitmap_unset() function We've never needed to unset an individual bit in a bitmap until now. Typically they start with all bits unset and we bitmap_set() them, or we are applying another bitmap as a mask. But the easiest way to apply an object filter to a bitmap result will be to unset the individual bits. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- ewah/bitmap.c | 8 ++++++++ ewah/ewok.h | 1 + 2 files changed, 9 insertions(+) diff --git a/ewah/bitmap.c b/ewah/bitmap.c index 52f1178db4..1c31b3e249 100644 --- a/ewah/bitmap.c +++ b/ewah/bitmap.c @@ -45,6 +45,14 @@ void bitmap_set(struct bitmap *self, size_t pos) self->words[block] |= EWAH_MASK(pos); } +void bitmap_unset(struct bitmap *self, size_t pos) +{ + size_t block = EWAH_BLOCK(pos); + + if (block < self->word_alloc) + self->words[block] &= ~EWAH_MASK(pos); +} + int bitmap_get(struct bitmap *self, size_t pos) { size_t block = EWAH_BLOCK(pos); diff --git a/ewah/ewok.h b/ewah/ewok.h index 84b2a29faa..59f4ef7c4f 100644 --- a/ewah/ewok.h +++ b/ewah/ewok.h @@ -173,6 +173,7 @@ struct bitmap { struct bitmap *bitmap_new(void); void bitmap_set(struct bitmap *self, size_t pos); +void bitmap_unset(struct bitmap *self, size_t pos); int bitmap_get(struct bitmap *self, size_t pos); void bitmap_reset(struct bitmap *self); void bitmap_free(struct bitmap *self); -- cgit v1.2.3 From 4f3bd5606a02260274555f41fd7d6368f2bea1d8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:36 -0500 Subject: pack-bitmap: implement BLOB_NONE filtering We can easily support BLOB_NONE filters with bitmaps. Since we know the types of all of the objects, we just need to clear the result bits of any blobs. Note two subtleties in the implementation (which I also called out in comments): - we have to include any blobs that were specifically asked for (and not reached through graph traversal) to match the non-bitmap version - we have to handle in-pack and "ext_index" objects separately. Arguably prepare_bitmap_walk() could be adding these ext_index objects to the type bitmaps. But it doesn't for now, so let's match the rest of the bitmap code here (it probably wouldn't be an efficiency improvement to do so since the cost of extending those bitmaps is about the same as our loop here, but it might make the code a bit simpler). Here are perf results for the new test on git.git: Test HEAD^ HEAD -------------------------------------------------------------------------------- 5310.9: rev-list count with blob:none 1.67(1.62+0.05) 0.22(0.21+0.02) -86.8% Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap.c | 74 ++++++++++++++++++++++++++++++++++++++ t/perf/p5310-pack-bitmaps.sh | 5 +++ t/t6113-rev-list-bitmap-filters.sh | 14 ++++++++ 3 files changed, 93 insertions(+) diff --git a/pack-bitmap.c b/pack-bitmap.c index 48c8694f92..dcf8a9aadf 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -712,6 +712,73 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git, return 0; } +static struct bitmap *find_tip_blobs(struct bitmap_index *bitmap_git, + struct object_list *tip_objects) +{ + struct bitmap *result = bitmap_new(); + struct object_list *p; + + for (p = tip_objects; p; p = p->next) { + int pos; + + if (p->item->type != OBJ_BLOB) + continue; + + pos = bitmap_position(bitmap_git, &p->item->oid); + if (pos < 0) + continue; + + bitmap_set(result, pos); + } + + return result; +} + +static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter) +{ + struct eindex *eindex = &bitmap_git->ext_index; + struct bitmap *tips; + struct ewah_iterator it; + eword_t mask; + uint32_t i; + + /* + * The non-bitmap version of this filter never removes + * blobs which the other side specifically asked for, + * so we must match that behavior. + */ + tips = find_tip_blobs(bitmap_git, tip_objects); + + /* + * We can use the blob type-bitmap to work in whole words + * for the objects that are actually in the bitmapped packfile. + */ + for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB); + i < to_filter->word_alloc && ewah_iterator_next(&mask, &it); + i++) { + if (i < tips->word_alloc) + mask &= ~tips->words[i]; + to_filter->words[i] &= ~mask; + } + + /* + * Clear any blobs that weren't in the packfile (and so would not have + * been caught by the loop above. We'll have to check them + * individually. + */ + for (i = 0; i < eindex->count; i++) { + uint32_t pos = i + bitmap_git->pack->num_objects; + if (eindex->objects[i]->type == OBJ_BLOB && + bitmap_get(to_filter, pos) && + !bitmap_get(tips, pos)) + bitmap_unset(to_filter, pos); + } + + bitmap_free(tips); +} + static int filter_bitmap(struct bitmap_index *bitmap_git, struct object_list *tip_objects, struct bitmap *to_filter, @@ -720,6 +787,13 @@ static int filter_bitmap(struct bitmap_index *bitmap_git, if (!filter || filter->choice == LOFC_DISABLED) return 0; + if (filter->choice == LOFC_BLOB_NONE) { + if (bitmap_git) + filter_bitmap_blob_none(bitmap_git, tip_objects, + to_filter); + return 0; + } + /* filter choice not handled */ return -1; } diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index e52f66ec9e..936742314c 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -47,6 +47,11 @@ test_perf 'rev-list (objects)' ' git rev-list --all --use-bitmap-index --objects >/dev/null ' +test_perf 'rev-list count with blob:none' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=blob:none >/dev/null +' + test_expect_success 'create partial bitmap state' ' # pick a commit to represent the repo tip in the past cutoff=$(git rev-list HEAD~100 -1) && diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh index 977f8d0930..f4e6d582f0 100755 --- a/t/t6113-rev-list-bitmap-filters.sh +++ b/t/t6113-rev-list-bitmap-filters.sh @@ -21,4 +21,18 @@ test_expect_success 'filters fallback to non-bitmap traversal' ' test_cmp expect actual ' +test_expect_success 'blob:none filter' ' + git rev-list --objects --filter=blob:none HEAD >expect && + git rev-list --use-bitmap-index \ + --objects --filter=blob:none HEAD >actual && + test_bitmap_traversal expect actual +' + +test_expect_success 'blob:none filter with specified blob' ' + git rev-list --objects --filter=blob:none HEAD HEAD:two.t >expect && + git rev-list --use-bitmap-index \ + --objects --filter=blob:none HEAD HEAD:two.t >actual && + test_bitmap_traversal expect actual +' + test_done -- cgit v1.2.3 From 84243da1298890bd7370df66b754c2b252a08346 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:39 -0500 Subject: pack-bitmap: implement BLOB_LIMIT filtering Just as the previous commit implemented BLOB_NONE, we can support BLOB_LIMIT filters by looking at the sizes of any blobs in the result and unsetting their bits as appropriate. This is slightly more expensive than BLOB_NONE, but still produces a noticeable speedup (these results are on git.git): Test HEAD~2 HEAD ------------------------------------------------------------------------------------ 5310.9: rev-list count with blob:none 1.80(1.77+0.02) 0.22(0.20+0.02) -87.8% 5310.10: rev-list count with blob:limit=1k 1.99(1.96+0.03) 0.29(0.25+0.03) -85.4% The implementation is similar to the BLOB_NONE one, with the exception that we have to go object-by-object while walking the blob-type bitmap (since we can't mask out the matches, but must look up the size individually for each blob). The trick with using ctz64() is taken from show_objects_for_type(), which likewise needs to find individual bits (but wants to quickly skip over big chunks without blobs). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- pack-bitmap.c | 80 ++++++++++++++++++++++++++++++++++++++ t/perf/p5310-pack-bitmaps.sh | 5 +++ t/t6113-rev-list-bitmap-filters.sh | 20 +++++++++- 3 files changed, 104 insertions(+), 1 deletion(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index dcf8a9aadf..9d680065e4 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -779,6 +779,78 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, bitmap_free(tips); } +static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, + uint32_t pos) +{ + struct packed_git *pack = bitmap_git->pack; + unsigned long size; + struct object_info oi = OBJECT_INFO_INIT; + + oi.sizep = &size; + + if (pos < pack->num_objects) { + struct revindex_entry *entry = &pack->revindex[pos]; + if (packed_object_info(the_repository, pack, + entry->offset, &oi) < 0) { + struct object_id oid; + nth_packed_object_oid(&oid, pack, entry->nr); + die(_("unable to get size of %s"), oid_to_hex(&oid)); + } + } else { + struct eindex *eindex = &bitmap_git->ext_index; + struct object *obj = eindex->objects[pos - pack->num_objects]; + if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) + die(_("unable to get size of %s"), oid_to_hex(&obj->oid)); + } + + return size; +} + +static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter, + unsigned long limit) +{ + struct eindex *eindex = &bitmap_git->ext_index; + struct bitmap *tips; + struct ewah_iterator it; + eword_t mask; + uint32_t i; + + tips = find_tip_blobs(bitmap_git, tip_objects); + + for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB); + i < to_filter->word_alloc && ewah_iterator_next(&mask, &it); + i++) { + eword_t word = to_filter->words[i] & mask; + unsigned offset; + + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + uint32_t pos; + + if ((word >> offset) == 0) + break; + offset += ewah_bit_ctz64(word >> offset); + pos = i * BITS_IN_EWORD + offset; + + if (!bitmap_get(tips, pos) && + get_size_by_pos(bitmap_git, pos) >= limit) + bitmap_unset(to_filter, pos); + } + } + + for (i = 0; i < eindex->count; i++) { + uint32_t pos = i + bitmap_git->pack->num_objects; + if (eindex->objects[i]->type == OBJ_BLOB && + bitmap_get(to_filter, pos) && + !bitmap_get(tips, pos) && + get_size_by_pos(bitmap_git, pos) >= limit) + bitmap_unset(to_filter, pos); + } + + bitmap_free(tips); +} + static int filter_bitmap(struct bitmap_index *bitmap_git, struct object_list *tip_objects, struct bitmap *to_filter, @@ -794,6 +866,14 @@ static int filter_bitmap(struct bitmap_index *bitmap_git, return 0; } + if (filter->choice == LOFC_BLOB_LIMIT) { + if (bitmap_git) + filter_bitmap_blob_limit(bitmap_git, tip_objects, + to_filter, + filter->blob_limit_value); + return 0; + } + /* filter choice not handled */ return -1; } diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 936742314c..8b43a545c1 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -52,6 +52,11 @@ test_perf 'rev-list count with blob:none' ' --filter=blob:none >/dev/null ' +test_perf 'rev-list count with blob:limit=1k' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=blob:limit=1k >/dev/null +' + test_expect_success 'create partial bitmap state' ' # pick a commit to represent the repo tip in the past cutoff=$(git rev-list HEAD~100 -1) && diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh index f4e6d582f0..145603f124 100755 --- a/t/t6113-rev-list-bitmap-filters.sh +++ b/t/t6113-rev-list-bitmap-filters.sh @@ -6,8 +6,10 @@ test_description='rev-list combining bitmaps and filters' test_expect_success 'set up bitmapped repo' ' # one commit will have bitmaps, the other will not test_commit one && + test_commit much-larger-blob-one && git repack -adb && - test_commit two + test_commit two && + test_commit much-larger-blob-two ' test_expect_success 'filters fallback to non-bitmap traversal' ' @@ -35,4 +37,20 @@ test_expect_success 'blob:none filter with specified blob' ' test_bitmap_traversal expect actual ' +test_expect_success 'blob:limit filter' ' + git rev-list --objects --filter=blob:limit=5 HEAD >expect && + git rev-list --use-bitmap-index \ + --objects --filter=blob:limit=5 HEAD >actual && + test_bitmap_traversal expect actual +' + +test_expect_success 'blob:limit filter with specified blob' ' + git rev-list --objects --filter=blob:limit=5 \ + HEAD HEAD:much-larger-blob-two.t >expect && + git rev-list --use-bitmap-index \ + --objects --filter=blob:limit=5 \ + HEAD HEAD:much-larger-blob-two.t >actual && + test_bitmap_traversal expect actual +' + test_done -- cgit v1.2.3 From 3ab3185f999f5d0d0079ac8246edb8fca5d9d3fd Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 14 Feb 2020 13:22:41 -0500 Subject: pack-objects: support filters with bitmaps Just as rev-list recently learned to combine filters and bitmaps, let's do the same for pack-objects. The infrastructure is all there; we just need to pass along our filter options, and the pack-bitmap code will decide to use bitmaps or not. This unsurprisingly makes things faster for partial clones of large repositories (here we're cloning linux.git): Test HEAD^ HEAD ------------------------------------------------------------------------------ 5310.11: simulated partial clone 38.94(37.28+5.87) 11.06(11.27+4.07) -71.6% Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 3 +-- t/perf/p5310-pack-bitmaps.sh | 4 ++++ t/t5310-pack-bitmaps.sh | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 2bb81c2133..6bc9bc1ce2 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3040,7 +3040,7 @@ static int pack_options_allow_reuse(void) static int get_object_list_from_bitmap(struct rev_info *revs) { - if (!(bitmap_git = prepare_bitmap_walk(revs, NULL))) + if (!(bitmap_git = prepare_bitmap_walk(revs, &filter_options))) return -1; if (pack_options_allow_reuse() && @@ -3419,7 +3419,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (filter_options.choice) { if (!pack_to_stdout) die(_("cannot use --filter without --stdout")); - use_bitmap_index = 0; } /* diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 8b43a545c1..7743f4f4c9 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -57,6 +57,10 @@ test_perf 'rev-list count with blob:limit=1k' ' --filter=blob:limit=1k >/dev/null ' +test_perf 'simulated partial clone' ' + git pack-objects --stdout --all --filter=blob:none /dev/null +' + test_expect_success 'create partial bitmap state' ' # pick a commit to represent the repo tip in the past cutoff=$(git rev-list HEAD~100 -1) && diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh index 2c64d0c441..8318781d2b 100755 --- a/t/t5310-pack-bitmaps.sh +++ b/t/t5310-pack-bitmaps.sh @@ -107,6 +107,20 @@ test_expect_success 'clone from bitmapped repository' ' test_cmp expect actual ' +test_expect_success 'partial clone from bitmapped repository' ' + test_config uploadpack.allowfilter true && + git clone --no-local --bare --filter=blob:none . partial-clone.git && + ( + cd partial-clone.git && + pack=$(echo objects/pack/*.pack) && + git verify-pack -v "$pack" >have && + awk "/blob/ { print \$1 }" blobs && + # we expect this single blob because of the direct ref + git rev-parse refs/tags/tagged-blob >expect && + test_cmp expect blobs + ) +' + test_expect_success 'setup further non-bitmapped commits' ' test_commit_bulk --id=further 10 ' -- cgit v1.2.3 From 20a5fd881a98cfe153fa5a81754994c7046a6e41 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 18 Feb 2020 13:21:46 -0800 Subject: rev-list --count: comment on the use of count_right++ Signed-off-by: Junio C Hamano --- builtin/rev-list.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 35e14ad2ed..f520111eda 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -262,6 +262,13 @@ static void show_object(struct object *obj, const char *name, void *cb_data) return; if (revs->count) { + /* + * The object count is always accumulated in the .count_right + * field for traversal that is not a left-right traversal, + * and cmd_rev_list() made sure that a .count request that + * wants to count non-commit objects, which is handled by + * the show_object() callback, does not ask for .left_right. + */ revs->count_right++; return; } -- cgit v1.2.3