From 5bf7f1eaa51b3f35161e1c9e4d8bc843330dea3c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 4 May 2020 17:12:27 -0600 Subject: list-objects-filter: treat NULL filter_options as "disabled" In most callers, we have an actual list_objects_filter_options struct, and if no filtering is desired its "choice" element will be LOFC_DISABLED. However, some code may have only a pointer to such a struct which may be NULL (because _their_ callers didn't care about filtering, either). Rather than forcing them to handle this explicitly like: if (filter_options) traverse_commit_list_filtered(filter_options, revs, show_commit, show_object, show_data, NULL); else traverse_commit_list(revs, show_commit, show_object, show_data); let's just treat a NULL filter_options the same as LOFC_DISABLED. We only need a small change, since that option struct is converted into a real filter only in the "init" function. Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- list-objects-filter.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/list-objects-filter.c b/list-objects-filter.c index 1e8d4e763d..0a3ef3cab3 100644 --- a/list-objects-filter.c +++ b/list-objects-filter.c @@ -663,6 +663,9 @@ struct filter *list_objects_filter__init( assert((sizeof(s_filters) / sizeof(s_filters[0])) == LOFC__COUNT); + if (!filter_options) + return NULL; + if (filter_options->choice >= LOFC__COUNT) BUG("invalid list-objects filter choice: %d", filter_options->choice); -- cgit v1.2.3 From 856e12c18a4616bbfbdfd1777577c1182b148519 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 4 May 2020 17:12:31 -0600 Subject: pack-bitmap.c: make object filtering functions generic In 4f3bd5606a (pack-bitmap: implement BLOB_NONE filtering, 2020-02-14), filtering support for bitmaps was added for the 'LOFC_BLOB_NONE' filter. In the future, we would like to add support for filters that behave as if they exclude a certain type of object, for e.g., the tree depth filter with depth 0. To prepare for this, make some of the functions used for filtering more generic, such as 'find_tip_blobs' and 'filter_bitmap_blob_none' so that they can work over arbitrary object types. To that end, create 'find_tip_objects' and 'filter_bitmap_exclude_type', and redefine the aforementioned functions in terms of those. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 49a8d10d0c..3693c9e62f 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -715,8 +715,9 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git, return 0; } -static struct bitmap *find_tip_blobs(struct bitmap_index *bitmap_git, - struct object_list *tip_objects) +static struct bitmap *find_tip_objects(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + enum object_type type) { struct bitmap *result = bitmap_new(); struct object_list *p; @@ -724,7 +725,7 @@ static struct bitmap *find_tip_blobs(struct bitmap_index *bitmap_git, for (p = tip_objects; p; p = p->next) { int pos; - if (p->item->type != OBJ_BLOB) + if (p->item->type != type) continue; pos = bitmap_position(bitmap_git, &p->item->oid); @@ -737,9 +738,10 @@ static struct bitmap *find_tip_blobs(struct bitmap_index *bitmap_git, return result; } -static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, - struct object_list *tip_objects, - struct bitmap *to_filter) +static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter, + enum object_type type) { struct eindex *eindex = &bitmap_git->ext_index; struct bitmap *tips; @@ -747,18 +749,21 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, eword_t mask; uint32_t i; + if (type != OBJ_BLOB) + BUG("filter_bitmap_exclude_type: unsupported type '%d'", type); + /* * The non-bitmap version of this filter never removes - * blobs which the other side specifically asked for, + * objects which the other side specifically asked for, * so we must match that behavior. */ - tips = find_tip_blobs(bitmap_git, tip_objects); + tips = find_tip_objects(bitmap_git, tip_objects, type); /* * We can use the blob type-bitmap to work in whole words * for the objects that are actually in the bitmapped packfile. */ - for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB); + for (i = 0, init_type_iterator(&it, bitmap_git, type); i < to_filter->word_alloc && ewah_iterator_next(&mask, &it); i++) { if (i < tips->word_alloc) @@ -773,7 +778,7 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, */ for (i = 0; i < eindex->count; i++) { uint32_t pos = i + bitmap_git->pack->num_objects; - if (eindex->objects[i]->type == OBJ_BLOB && + if (eindex->objects[i]->type == type && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos)) bitmap_unset(to_filter, pos); @@ -782,6 +787,14 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, bitmap_free(tips); } +static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter) +{ + filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, + OBJ_BLOB); +} + static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, uint32_t pos) { @@ -820,7 +833,7 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, eword_t mask; uint32_t i; - tips = find_tip_blobs(bitmap_git, tip_objects); + tips = find_tip_objects(bitmap_git, tip_objects, OBJ_BLOB); for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB); i < to_filter->word_alloc && ewah_iterator_next(&mask, &it); -- cgit v1.2.3 From b0a8d4820baa9966511ea9bd8c8b09dab087475f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 4 May 2020 17:12:35 -0600 Subject: pack-bitmap.c: support 'tree:0' filtering In the previous patch, we made it easy to define other filters that exclude all objects of a certain type. Use that in order to implement bitmap-level filtering for the '--filter=tree:' filter when 'n' is equal to 0. The general case is not helped by bitmaps, since for values of 'n > 0', the object filtering machinery requires a full-blown tree traversal in order to determine the depth of a given tree. Caching this is non-obvious, too, since the same tree object can have a different depth depending on the context (e.g., a tree was moved up in the directory hierarchy between two commits). But, the 'n = 0' case can be helped, and this patch does so. Running p5310.11 in this tree and on master with the kernel, we can see that this case is helped substantially: Test master this tree -------------------------------------------------------------------------------- 5310.11: rev-list count with tree:0 10.68(10.39+0.27) 0.06(0.04+0.01) -99.4% Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 25 ++++++++++++++++++++++++- t/perf/p5310-pack-bitmaps.sh | 5 +++++ t/t6113-rev-list-bitmap-filters.sh | 21 +++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 3693c9e62f..195ee8cad0 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -749,7 +749,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, eword_t mask; uint32_t i; - if (type != OBJ_BLOB) + if (type != OBJ_BLOB && type != OBJ_TREE) BUG("filter_bitmap_exclude_type: unsupported type '%d'", type); /* @@ -867,6 +867,20 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, bitmap_free(tips); } +static void filter_bitmap_tree_depth(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter, + unsigned long limit) +{ + if (limit) + BUG("filter_bitmap_tree_depth given non-zero limit"); + + filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, + OBJ_TREE); + filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, + OBJ_BLOB); +} + static int filter_bitmap(struct bitmap_index *bitmap_git, struct object_list *tip_objects, struct bitmap *to_filter, @@ -890,6 +904,15 @@ static int filter_bitmap(struct bitmap_index *bitmap_git, return 0; } + if (filter->choice == LOFC_TREE_DEPTH && + filter->tree_exclude_depth == 0) { + if (bitmap_git) + filter_bitmap_tree_depth(bitmap_git, tip_objects, + to_filter, + filter->tree_exclude_depth); + return 0; + } + /* filter choice not handled */ return -1; } diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 80c53edca7..75ccf9f4e3 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -53,6 +53,11 @@ test_perf 'rev-list count with blob:limit=1k' ' --filter=blob:limit=1k >/dev/null ' +test_perf 'rev-list count with tree:0' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=tree:0 >/dev/null +' + test_perf 'simulated partial clone' ' git pack-objects --stdout --all --filter=blob:none /dev/null ' diff --git a/t/t6113-rev-list-bitmap-filters.sh b/t/t6113-rev-list-bitmap-filters.sh index 145603f124..2b551e6fd0 100755 --- a/t/t6113-rev-list-bitmap-filters.sh +++ b/t/t6113-rev-list-bitmap-filters.sh @@ -53,4 +53,25 @@ test_expect_success 'blob:limit filter with specified blob' ' test_bitmap_traversal expect actual ' +test_expect_success 'tree:0 filter' ' + git rev-list --objects --filter=tree:0 HEAD >expect && + git rev-list --use-bitmap-index \ + --objects --filter=tree:0 HEAD >actual && + test_bitmap_traversal expect actual +' + +test_expect_success 'tree:0 filter with specified blob, tree' ' + git rev-list --objects --filter=tree:0 HEAD HEAD:two.t >expect && + git rev-list --use-bitmap-index \ + --objects --filter=tree:0 HEAD HEAD:two.t >actual && + test_bitmap_traversal expect actual +' + +test_expect_success 'tree:1 filter' ' + git rev-list --objects --filter=tree:1 HEAD >expect && + git rev-list --use-bitmap-index \ + --objects --filter=tree:1 HEAD >actual && + test_cmp expect actual +' + test_done -- cgit v1.2.3 From 9639474b6dd126bae29dba70e2fbe11169d60e20 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 4 May 2020 17:12:38 -0600 Subject: pack-bitmap: pass object filter to fill-in traversal Sometimes a bitmap traversal still has to walk some commits manually, because those commits aren't included in the bitmap packfile (e.g., due to a push or commit since the last full repack). If we're given an object filter, we don't pass it down to this traversal. It's not necessary for correctness because the bitmap code has its own filters to post-process the bitmap result (which it must, to filter out the objects that _are_ mentioned in the bitmapped packfile). And with blob filters, there was no performance reason to pass along those filters, either. The fill-in traversal could omit them from the result, but it wouldn't save us any time to do so, since we'd still have to walk each tree entry to see if it's a blob or not. But now that we support tree filters, there's opportunity for savings. A tree:depth=0 filter means we can avoid accessing trees entirely, since we know we won't them (or any of the subtrees or blobs they point to). The new test in p5310 shows this off (the "partial bitmap" state is one where HEAD~100 and its ancestors are all in a bitmapped pack, but HEAD~100..HEAD are not). Here are the results (run against linux.git): Test HEAD^ HEAD ------------------------------------------------------------------------------------------------- [...] 5310.16: rev-list with tree filter (partial bitmap) 0.19(0.17+0.02) 0.03(0.02+0.01) -84.2% The absolute number of savings isn't _huge_, but keep in mind that we only omitted 100 first-parent links (in the version of linux.git here, that's 894 actual commits). In a more pathological case, we might have a much larger proportion of non-bitmapped commits. I didn't bother creating such a case in the perf script because the setup is expensive, and this is plenty to show the savings as a percentage. Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 14 +++++++++----- t/perf/p5310-pack-bitmaps.sh | 5 +++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/pack-bitmap.c b/pack-bitmap.c index 195ee8cad0..4077e731e8 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -506,7 +506,8 @@ static int should_include(struct commit *commit, void *_data) static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, - struct bitmap *seen) + struct bitmap *seen, + struct list_objects_filter_options *filter) { struct bitmap *base = NULL; int needs_walk = 0; @@ -599,8 +600,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, show_data.bitmap_git = bitmap_git; show_data.base = base; - traverse_commit_list(revs, show_commit, show_object, - &show_data); + traverse_commit_list_filtered(filter, revs, + show_commit, show_object, + &show_data, NULL); } return base; @@ -999,7 +1001,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (haves) { revs->ignore_missing_links = 1; - haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); + haves_bitmap = find_objects(bitmap_git, revs, haves, NULL, + filter); reset_revision_walk(); revs->ignore_missing_links = 0; @@ -1007,7 +1010,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, BUG("failed to perform bitmap walk"); } - wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); + wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap, + filter); if (!wants_bitmap) BUG("failed to perform bitmap walk"); diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index 75ccf9f4e3..b3e725f031 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -91,4 +91,9 @@ test_perf 'pack to file (partial bitmap)' ' git pack-objects --use-bitmap-index --all pack2b /dev/null ' +test_perf 'rev-list with tree filter (partial bitmap)' ' + git rev-list --use-bitmap-index --count --objects --all \ + --filter=tree:0 >/dev/null +' + test_done -- cgit v1.2.3