From 856e12c18a4616bbfbdfd1777577c1182b148519 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 4 May 2020 17:12:31 -0600 Subject: pack-bitmap.c: make object filtering functions generic In 4f3bd5606a (pack-bitmap: implement BLOB_NONE filtering, 2020-02-14), filtering support for bitmaps was added for the 'LOFC_BLOB_NONE' filter. In the future, we would like to add support for filters that behave as if they exclude a certain type of object, for e.g., the tree depth filter with depth 0. To prepare for this, make some of the functions used for filtering more generic, such as 'find_tip_blobs' and 'filter_bitmap_blob_none' so that they can work over arbitrary object types. To that end, create 'find_tip_objects' and 'filter_bitmap_exclude_type', and redefine the aforementioned functions in terms of those. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'pack-bitmap.c') diff --git a/pack-bitmap.c b/pack-bitmap.c index 49a8d10d0c..3693c9e62f 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -715,8 +715,9 @@ static int in_bitmapped_pack(struct bitmap_index *bitmap_git, return 0; } -static struct bitmap *find_tip_blobs(struct bitmap_index *bitmap_git, - struct object_list *tip_objects) +static struct bitmap *find_tip_objects(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + enum object_type type) { struct bitmap *result = bitmap_new(); struct object_list *p; @@ -724,7 +725,7 @@ static struct bitmap *find_tip_blobs(struct bitmap_index *bitmap_git, for (p = tip_objects; p; p = p->next) { int pos; - if (p->item->type != OBJ_BLOB) + if (p->item->type != type) continue; pos = bitmap_position(bitmap_git, &p->item->oid); @@ -737,9 +738,10 @@ static struct bitmap *find_tip_blobs(struct bitmap_index *bitmap_git, return result; } -static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, - struct object_list *tip_objects, - struct bitmap *to_filter) +static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter, + enum object_type type) { struct eindex *eindex = &bitmap_git->ext_index; struct bitmap *tips; @@ -747,18 +749,21 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, eword_t mask; uint32_t i; + if (type != OBJ_BLOB) + BUG("filter_bitmap_exclude_type: unsupported type '%d'", type); + /* * The non-bitmap version of this filter never removes - * blobs which the other side specifically asked for, + * objects which the other side specifically asked for, * so we must match that behavior. */ - tips = find_tip_blobs(bitmap_git, tip_objects); + tips = find_tip_objects(bitmap_git, tip_objects, type); /* * We can use the blob type-bitmap to work in whole words * for the objects that are actually in the bitmapped packfile. */ - for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB); + for (i = 0, init_type_iterator(&it, bitmap_git, type); i < to_filter->word_alloc && ewah_iterator_next(&mask, &it); i++) { if (i < tips->word_alloc) @@ -773,7 +778,7 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, */ for (i = 0; i < eindex->count; i++) { uint32_t pos = i + bitmap_git->pack->num_objects; - if (eindex->objects[i]->type == OBJ_BLOB && + if (eindex->objects[i]->type == type && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos)) bitmap_unset(to_filter, pos); @@ -782,6 +787,14 @@ static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, bitmap_free(tips); } +static void filter_bitmap_blob_none(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter) +{ + filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, + OBJ_BLOB); +} + static unsigned long get_size_by_pos(struct bitmap_index *bitmap_git, uint32_t pos) { @@ -820,7 +833,7 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, eword_t mask; uint32_t i; - tips = find_tip_blobs(bitmap_git, tip_objects); + tips = find_tip_objects(bitmap_git, tip_objects, OBJ_BLOB); for (i = 0, init_type_iterator(&it, bitmap_git, OBJ_BLOB); i < to_filter->word_alloc && ewah_iterator_next(&mask, &it); -- cgit v1.2.3 From b0a8d4820baa9966511ea9bd8c8b09dab087475f Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Mon, 4 May 2020 17:12:35 -0600 Subject: pack-bitmap.c: support 'tree:0' filtering In the previous patch, we made it easy to define other filters that exclude all objects of a certain type. Use that in order to implement bitmap-level filtering for the '--filter=tree:' filter when 'n' is equal to 0. The general case is not helped by bitmaps, since for values of 'n > 0', the object filtering machinery requires a full-blown tree traversal in order to determine the depth of a given tree. Caching this is non-obvious, too, since the same tree object can have a different depth depending on the context (e.g., a tree was moved up in the directory hierarchy between two commits). But, the 'n = 0' case can be helped, and this patch does so. Running p5310.11 in this tree and on master with the kernel, we can see that this case is helped substantially: Test master this tree -------------------------------------------------------------------------------- 5310.11: rev-list count with tree:0 10.68(10.39+0.27) 0.06(0.04+0.01) -99.4% Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'pack-bitmap.c') diff --git a/pack-bitmap.c b/pack-bitmap.c index 3693c9e62f..195ee8cad0 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -749,7 +749,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, eword_t mask; uint32_t i; - if (type != OBJ_BLOB) + if (type != OBJ_BLOB && type != OBJ_TREE) BUG("filter_bitmap_exclude_type: unsupported type '%d'", type); /* @@ -867,6 +867,20 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, bitmap_free(tips); } +static void filter_bitmap_tree_depth(struct bitmap_index *bitmap_git, + struct object_list *tip_objects, + struct bitmap *to_filter, + unsigned long limit) +{ + if (limit) + BUG("filter_bitmap_tree_depth given non-zero limit"); + + filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, + OBJ_TREE); + filter_bitmap_exclude_type(bitmap_git, tip_objects, to_filter, + OBJ_BLOB); +} + static int filter_bitmap(struct bitmap_index *bitmap_git, struct object_list *tip_objects, struct bitmap *to_filter, @@ -890,6 +904,15 @@ static int filter_bitmap(struct bitmap_index *bitmap_git, return 0; } + if (filter->choice == LOFC_TREE_DEPTH && + filter->tree_exclude_depth == 0) { + if (bitmap_git) + filter_bitmap_tree_depth(bitmap_git, tip_objects, + to_filter, + filter->tree_exclude_depth); + return 0; + } + /* filter choice not handled */ return -1; } -- cgit v1.2.3 From 9639474b6dd126bae29dba70e2fbe11169d60e20 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 4 May 2020 17:12:38 -0600 Subject: pack-bitmap: pass object filter to fill-in traversal Sometimes a bitmap traversal still has to walk some commits manually, because those commits aren't included in the bitmap packfile (e.g., due to a push or commit since the last full repack). If we're given an object filter, we don't pass it down to this traversal. It's not necessary for correctness because the bitmap code has its own filters to post-process the bitmap result (which it must, to filter out the objects that _are_ mentioned in the bitmapped packfile). And with blob filters, there was no performance reason to pass along those filters, either. The fill-in traversal could omit them from the result, but it wouldn't save us any time to do so, since we'd still have to walk each tree entry to see if it's a blob or not. But now that we support tree filters, there's opportunity for savings. A tree:depth=0 filter means we can avoid accessing trees entirely, since we know we won't them (or any of the subtrees or blobs they point to). The new test in p5310 shows this off (the "partial bitmap" state is one where HEAD~100 and its ancestors are all in a bitmapped pack, but HEAD~100..HEAD are not). Here are the results (run against linux.git): Test HEAD^ HEAD ------------------------------------------------------------------------------------------------- [...] 5310.16: rev-list with tree filter (partial bitmap) 0.19(0.17+0.02) 0.03(0.02+0.01) -84.2% The absolute number of savings isn't _huge_, but keep in mind that we only omitted 100 first-parent links (in the version of linux.git here, that's 894 actual commits). In a more pathological case, we might have a much larger proportion of non-bitmapped commits. I didn't bother creating such a case in the perf script because the setup is expensive, and this is plenty to show the savings as a percentage. Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- pack-bitmap.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'pack-bitmap.c') diff --git a/pack-bitmap.c b/pack-bitmap.c index 195ee8cad0..4077e731e8 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -506,7 +506,8 @@ static int should_include(struct commit *commit, void *_data) static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, - struct bitmap *seen) + struct bitmap *seen, + struct list_objects_filter_options *filter) { struct bitmap *base = NULL; int needs_walk = 0; @@ -599,8 +600,9 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, show_data.bitmap_git = bitmap_git; show_data.base = base; - traverse_commit_list(revs, show_commit, show_object, - &show_data); + traverse_commit_list_filtered(filter, revs, + show_commit, show_object, + &show_data, NULL); } return base; @@ -999,7 +1001,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (haves) { revs->ignore_missing_links = 1; - haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); + haves_bitmap = find_objects(bitmap_git, revs, haves, NULL, + filter); reset_revision_walk(); revs->ignore_missing_links = 0; @@ -1007,7 +1010,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, BUG("failed to perform bitmap walk"); } - wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); + wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap, + filter); if (!wants_bitmap) BUG("failed to perform bitmap walk"); -- cgit v1.2.3