diff options
Diffstat (limited to 'pack-bitmap-write.c')
-rw-r--r-- | pack-bitmap-write.c | 468 |
1 files changed, 306 insertions, 162 deletions
diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 5e998bdaa7..92460a6126 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -9,9 +9,10 @@ #include "pack-revindex.h" #include "pack.h" #include "pack-bitmap.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "pack-objects.h" #include "commit-reach.h" +#include "prio-queue.h" struct bitmapped_commit { struct commit *commit; @@ -29,7 +30,6 @@ struct bitmap_writer { struct ewah_bitmap *tags; kh_oid_map_t *bitmaps; - kh_oid_map_t *reused; struct packing_data *to_pack; struct bitmapped_commit *selected; @@ -110,10 +110,8 @@ void bitmap_writer_build_type_index(struct packing_data *to_pack, /** * Compute the actual bitmaps */ -static struct object **seen_objects; -static unsigned int seen_objects_nr, seen_objects_alloc; -static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitmap *reused) +static inline void push_bitmapped_commit(struct commit *commit) { if (writer.selected_nr >= writer.selected_alloc) { writer.selected_alloc = (writer.selected_alloc + 32) * 2; @@ -121,27 +119,12 @@ static inline void push_bitmapped_commit(struct commit *commit, struct ewah_bitm } writer.selected[writer.selected_nr].commit = commit; - writer.selected[writer.selected_nr].bitmap = reused; + writer.selected[writer.selected_nr].bitmap = NULL; writer.selected[writer.selected_nr].flags = 0; writer.selected_nr++; } -static inline void mark_as_seen(struct object *object) -{ - ALLOC_GROW(seen_objects, seen_objects_nr + 1, seen_objects_alloc); - seen_objects[seen_objects_nr++] = object; -} - -static inline void reset_all_seen(void) -{ - unsigned int i; - for (i = 0; i < seen_objects_nr; ++i) { - seen_objects[i]->flags &= ~(SEEN | ADDED | SHOWN); - } - seen_objects_nr = 0; -} - static uint32_t find_object_pos(const struct object_id *oid) { struct object_entry *entry = packlist_find(writer.to_pack, oid); @@ -154,60 +137,6 @@ static uint32_t find_object_pos(const struct object_id *oid) return oe_in_pack_pos(writer.to_pack, entry); } -static void show_object(struct object *object, const char *name, void *data) -{ - struct bitmap *base = data; - bitmap_set(base, find_object_pos(&object->oid)); - mark_as_seen(object); -} - -static void show_commit(struct commit *commit, void *data) -{ - mark_as_seen((struct object *)commit); -} - -static int -add_to_include_set(struct bitmap *base, struct commit *commit) -{ - khiter_t hash_pos; - uint32_t bitmap_pos = find_object_pos(&commit->object.oid); - - if (bitmap_get(base, bitmap_pos)) - return 0; - - hash_pos = kh_get_oid_map(writer.bitmaps, commit->object.oid); - if (hash_pos < kh_end(writer.bitmaps)) { - struct bitmapped_commit *bc = kh_value(writer.bitmaps, hash_pos); - bitmap_or_ewah(base, bc->bitmap); - return 0; - } - - bitmap_set(base, bitmap_pos); - return 1; -} - -static int -should_include(struct commit *commit, void *_data) -{ - struct bitmap *base = _data; - - if (!add_to_include_set(base, commit)) { - struct commit_list *parent = commit->parents; - - mark_as_seen((struct object *)commit); - - while (parent) { - parent->item->object.flags |= SEEN; - mark_as_seen((struct object *)parent->item); - parent = parent->next; - } - - return 0; - } - - return 1; -} - static void compute_xor_offsets(void) { static const int MAX_XOR_OFFSET_SEARCH = 10; @@ -248,79 +177,326 @@ static void compute_xor_offsets(void) } } -void bitmap_writer_build(struct packing_data *to_pack) -{ - static const double REUSE_BITMAP_THRESHOLD = 0.2; +struct bb_commit { + struct commit_list *reverse_edges; + struct bitmap *commit_mask; + struct bitmap *bitmap; + unsigned selected:1, + maximal:1; + unsigned idx; /* within selected array */ +}; - int i, reuse_after, need_reset; - struct bitmap *base = bitmap_new(); - struct rev_info revs; +define_commit_slab(bb_data, struct bb_commit); - writer.bitmaps = kh_init_oid_map(); - writer.to_pack = to_pack; +struct bitmap_builder { + struct bb_data data; + struct commit **commits; + size_t commits_nr, commits_alloc; +}; - if (writer.show_progress) - writer.progress = start_progress("Building bitmaps", writer.selected_nr); +static void bitmap_builder_init(struct bitmap_builder *bb, + struct bitmap_writer *writer, + struct bitmap_index *old_bitmap) +{ + struct rev_info revs; + struct commit *commit; + struct commit_list *reusable = NULL; + struct commit_list *r; + unsigned int i, num_maximal = 0; - repo_init_revisions(to_pack->repo, &revs, NULL); - revs.tag_objects = 1; - revs.tree_objects = 1; - revs.blob_objects = 1; - revs.no_walk = 0; + memset(bb, 0, sizeof(*bb)); + init_bb_data(&bb->data); - revs.include_check = should_include; reset_revision_walk(); + repo_init_revisions(writer->to_pack->repo, &revs, NULL); + revs.topo_order = 1; + revs.first_parent_only = 1; + + for (i = 0; i < writer->selected_nr; i++) { + struct commit *c = writer->selected[i].commit; + struct bb_commit *ent = bb_data_at(&bb->data, c); + + ent->selected = 1; + ent->maximal = 1; + ent->idx = i; + + ent->commit_mask = bitmap_new(); + bitmap_set(ent->commit_mask, i); + + add_pending_object(&revs, &c->object, ""); + } + + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + + while ((commit = get_revision(&revs))) { + struct commit_list *p = commit->parents; + struct bb_commit *c_ent; + + parse_commit_or_die(commit); + + c_ent = bb_data_at(&bb->data, commit); + + /* + * If there is no commit_mask, there is no reason to iterate + * over this commit; it is not selected (if it were, it would + * not have a blank commit mask) and all its children have + * existing bitmaps (see the comment starting with "This commit + * has an existing bitmap" below), so it does not contribute + * anything to the final bitmap file or its descendants. + */ + if (!c_ent->commit_mask) + continue; + + if (old_bitmap && bitmap_for_commit(old_bitmap, commit)) { + /* + * This commit has an existing bitmap, so we can + * get its bits immediately without an object + * walk. That is, it is reusable as-is and there is no + * need to continue walking beyond it. + * + * Mark it as such and add it to bb->commits separately + * to avoid allocating a position in the commit mask. + */ + commit_list_insert(commit, &reusable); + goto next; + } + + if (c_ent->maximal) { + num_maximal++; + ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc); + bb->commits[bb->commits_nr++] = commit; + } + + if (p) { + struct bb_commit *p_ent = bb_data_at(&bb->data, p->item); + int c_not_p, p_not_c; + + if (!p_ent->commit_mask) { + p_ent->commit_mask = bitmap_new(); + c_not_p = 1; + p_not_c = 0; + } else { + c_not_p = bitmap_is_subset(c_ent->commit_mask, p_ent->commit_mask); + p_not_c = bitmap_is_subset(p_ent->commit_mask, c_ent->commit_mask); + } + + if (!c_not_p) + continue; + + bitmap_or(p_ent->commit_mask, c_ent->commit_mask); + + if (p_not_c) + p_ent->maximal = 1; + else { + p_ent->maximal = 0; + free_commit_list(p_ent->reverse_edges); + p_ent->reverse_edges = NULL; + } - reuse_after = writer.selected_nr * REUSE_BITMAP_THRESHOLD; - need_reset = 0; + if (c_ent->maximal) { + commit_list_insert(commit, &p_ent->reverse_edges); + } else { + struct commit_list *cc = c_ent->reverse_edges; + + for (; cc; cc = cc->next) { + if (!commit_list_contains(cc->item, p_ent->reverse_edges)) + commit_list_insert(cc->item, &p_ent->reverse_edges); + } + } + } + +next: + bitmap_free(c_ent->commit_mask); + c_ent->commit_mask = NULL; + } + + for (r = reusable; r; r = r->next) { + ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc); + bb->commits[bb->commits_nr++] = r->item; + } + + trace2_data_intmax("pack-bitmap-write", the_repository, + "num_selected_commits", writer->selected_nr); + trace2_data_intmax("pack-bitmap-write", the_repository, + "num_maximal_commits", num_maximal); + + free_commit_list(reusable); +} + +static void bitmap_builder_clear(struct bitmap_builder *bb) +{ + clear_bb_data(&bb->data); + free(bb->commits); + bb->commits_nr = bb->commits_alloc = 0; +} + +static void fill_bitmap_tree(struct bitmap *bitmap, + struct tree *tree) +{ + uint32_t pos; + struct tree_desc desc; + struct name_entry entry; + + /* + * If our bit is already set, then there is nothing to do. Both this + * tree and all of its children will be set. + */ + pos = find_object_pos(&tree->object.oid); + if (bitmap_get(bitmap, pos)) + return; + bitmap_set(bitmap, pos); - for (i = writer.selected_nr - 1; i >= 0; --i) { - struct bitmapped_commit *stored; - struct object *object; + if (parse_tree(tree) < 0) + die("unable to load tree object %s", + oid_to_hex(&tree->object.oid)); + init_tree_desc(&desc, tree->buffer, tree->size); - khiter_t hash_pos; - int hash_ret; + while (tree_entry(&desc, &entry)) { + switch (object_type(entry.mode)) { + case OBJ_TREE: + fill_bitmap_tree(bitmap, + lookup_tree(the_repository, &entry.oid)); + break; + case OBJ_BLOB: + bitmap_set(bitmap, find_object_pos(&entry.oid)); + break; + default: + /* Gitlink, etc; not reachable */ + break; + } + } - stored = &writer.selected[i]; - object = (struct object *)stored->commit; + free_tree_buffer(tree); +} - if (stored->bitmap == NULL) { - if (i < writer.selected_nr - 1 && - (need_reset || - !in_merge_bases(writer.selected[i + 1].commit, - stored->commit))) { - bitmap_reset(base); - reset_all_seen(); +static void fill_bitmap_commit(struct bb_commit *ent, + struct commit *commit, + struct prio_queue *queue, + struct prio_queue *tree_queue, + struct bitmap_index *old_bitmap, + const uint32_t *mapping) +{ + if (!ent->bitmap) + ent->bitmap = bitmap_new(); + + prio_queue_put(queue, commit); + + while (queue->nr) { + struct commit_list *p; + struct commit *c = prio_queue_get(queue); + + if (old_bitmap && mapping) { + struct ewah_bitmap *old = bitmap_for_commit(old_bitmap, c); + /* + * If this commit has an old bitmap, then translate that + * bitmap and add its bits to this one. No need to walk + * parents or the tree for this commit. + */ + if (old && !rebuild_bitmap(mapping, old, ent->bitmap)) + continue; + } + + /* + * Mark ourselves and queue our tree. The commit + * walk ensures we cover all parents. + */ + bitmap_set(ent->bitmap, find_object_pos(&c->object.oid)); + prio_queue_put(tree_queue, get_commit_tree(c)); + + for (p = c->parents; p; p = p->next) { + int pos = find_object_pos(&p->item->object.oid); + if (!bitmap_get(ent->bitmap, pos)) { + bitmap_set(ent->bitmap, pos); + prio_queue_put(queue, p->item); } + } + } - add_pending_object(&revs, object, ""); - revs.include_check_data = base; + while (tree_queue->nr) + fill_bitmap_tree(ent->bitmap, prio_queue_get(tree_queue)); +} - if (prepare_revision_walk(&revs)) - die("revision walk setup failed"); +static void store_selected(struct bb_commit *ent, struct commit *commit) +{ + struct bitmapped_commit *stored = &writer.selected[ent->idx]; + khiter_t hash_pos; + int hash_ret; - traverse_commit_list(&revs, show_commit, show_object, base); + stored->bitmap = bitmap_to_ewah(ent->bitmap); - object_array_clear(&revs.pending); + hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret); + if (hash_ret == 0) + die("Duplicate entry when writing index: %s", + oid_to_hex(&commit->object.oid)); + kh_value(writer.bitmaps, hash_pos) = stored; +} - stored->bitmap = bitmap_to_ewah(base); - need_reset = 0; - } else - need_reset = 1; +void bitmap_writer_build(struct packing_data *to_pack) +{ + struct bitmap_builder bb; + size_t i; + int nr_stored = 0; /* for progress */ + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; + struct prio_queue tree_queue = { NULL }; + struct bitmap_index *old_bitmap; + uint32_t *mapping; - if (i >= reuse_after) - stored->flags |= BITMAP_FLAG_REUSE; + writer.bitmaps = kh_init_oid_map(); + writer.to_pack = to_pack; - hash_pos = kh_put_oid_map(writer.bitmaps, object->oid, &hash_ret); - if (hash_ret == 0) - die("Duplicate entry when writing index: %s", - oid_to_hex(&object->oid)); + if (writer.show_progress) + writer.progress = start_progress("Building bitmaps", writer.selected_nr); + trace2_region_enter("pack-bitmap-write", "building_bitmaps_total", + the_repository); + + old_bitmap = prepare_bitmap_git(to_pack->repo); + if (old_bitmap) + mapping = create_bitmap_mapping(old_bitmap, to_pack); + else + mapping = NULL; + + bitmap_builder_init(&bb, &writer, old_bitmap); + for (i = bb.commits_nr; i > 0; i--) { + struct commit *commit = bb.commits[i-1]; + struct bb_commit *ent = bb_data_at(&bb.data, commit); + struct commit *child; + int reused = 0; + + fill_bitmap_commit(ent, commit, &queue, &tree_queue, + old_bitmap, mapping); + + if (ent->selected) { + store_selected(ent, commit); + nr_stored++; + display_progress(writer.progress, nr_stored); + } - kh_value(writer.bitmaps, hash_pos) = stored; - display_progress(writer.progress, writer.selected_nr - i); + while ((child = pop_commit(&ent->reverse_edges))) { + struct bb_commit *child_ent = + bb_data_at(&bb.data, child); + + if (child_ent->bitmap) + bitmap_or(child_ent->bitmap, ent->bitmap); + else if (reused) + child_ent->bitmap = bitmap_dup(ent->bitmap); + else { + child_ent->bitmap = ent->bitmap; + reused = 1; + } + } + if (!reused) + bitmap_free(ent->bitmap); + ent->bitmap = NULL; } + clear_prio_queue(&queue); + clear_prio_queue(&tree_queue); + bitmap_builder_clear(&bb); + free(mapping); + + trace2_region_leave("pack-bitmap-write", "building_bitmaps_total", + the_repository); - bitmap_free(base); stop_progress(&writer.progress); compute_xor_offsets(); @@ -360,35 +536,6 @@ static int date_compare(const void *_a, const void *_b) return (long)b->date - (long)a->date; } -void bitmap_writer_reuse_bitmaps(struct packing_data *to_pack) -{ - struct bitmap_index *bitmap_git; - if (!(bitmap_git = prepare_bitmap_git(to_pack->repo))) - return; - - writer.reused = kh_init_oid_map(); - rebuild_existing_bitmaps(bitmap_git, to_pack, writer.reused, - writer.show_progress); - /* - * NEEDSWORK: rebuild_existing_bitmaps() makes writer.reused reference - * some bitmaps in bitmap_git, so we can't free the latter. - */ -} - -static struct ewah_bitmap *find_reused_bitmap(const struct object_id *oid) -{ - khiter_t hash_pos; - - if (!writer.reused) - return NULL; - - hash_pos = kh_get_oid_map(writer.reused, *oid); - if (hash_pos >= kh_end(writer.reused)) - return NULL; - - return kh_value(writer.reused, hash_pos); -} - void bitmap_writer_select_commits(struct commit **indexed_commits, unsigned int indexed_commits_nr, int max_bitmaps) @@ -402,12 +549,11 @@ void bitmap_writer_select_commits(struct commit **indexed_commits, if (indexed_commits_nr < 100) { for (i = 0; i < indexed_commits_nr; ++i) - push_bitmapped_commit(indexed_commits[i], NULL); + push_bitmapped_commit(indexed_commits[i]); return; } for (;;) { - struct ewah_bitmap *reused_bitmap = NULL; struct commit *chosen = NULL; next = next_commit_index(i); @@ -422,15 +568,13 @@ void bitmap_writer_select_commits(struct commit **indexed_commits, if (next == 0) { chosen = indexed_commits[i]; - reused_bitmap = find_reused_bitmap(&chosen->object.oid); } else { chosen = indexed_commits[i + next]; for (j = 0; j <= next; ++j) { struct commit *cm = indexed_commits[i + j]; - reused_bitmap = find_reused_bitmap(&cm->object.oid); - if (reused_bitmap || (cm->object.flags & NEEDS_BITMAP) != 0) { + if ((cm->object.flags & NEEDS_BITMAP) != 0) { chosen = cm; break; } @@ -440,7 +584,7 @@ void bitmap_writer_select_commits(struct commit **indexed_commits, } } - push_bitmapped_commit(chosen, reused_bitmap); + push_bitmapped_commit(chosen); i += next + 1; display_progress(writer.progress, i); @@ -482,7 +626,7 @@ static void write_selected_commits_v1(struct hashfile *f, struct bitmapped_commit *stored = &writer.selected[i]; int commit_pos = - sha1_pos(stored->commit->object.oid.hash, index, index_nr, sha1_access); + hash_pos(stored->commit->object.oid.hash, index, index_nr, sha1_access); if (commit_pos < 0) BUG("trying to write commit not in index"); |