diff options
author | Junio C Hamano <gitster@pobox.com> | 2019-12-16 13:08:31 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2019-12-16 13:08:32 -0800 |
commit | 6d831b8a3ec7dc06981e6347eb9a45849ef88665 (patch) | |
tree | fc092e735f665fb642eb9e03e5c5769bd3aa99c9 | |
parent | Merge branch 'js/builtin-add-i-cmds' (diff) | |
parent | packfile.c: speed up loading lots of packfiles (diff) | |
download | tgif-6d831b8a3ec7dc06981e6347eb9a45849ef88665.tar.xz |
Merge branch 'cs/store-packfiles-in-hashmap'
In a repository with many packfiles, the cost of the procedure that
avoids registering the same packfile twice was unnecessarily high
by using an inefficient search algorithm, which has been corrected.
* cs/store-packfiles-in-hashmap:
packfile.c: speed up loading lots of packfiles
-rw-r--r-- | object-store.h | 21 | ||||
-rw-r--r-- | object.c | 3 | ||||
-rw-r--r-- | packfile.c | 19 | ||||
-rwxr-xr-x | t/perf/p5303-many-packs.sh | 18 |
4 files changed, 52 insertions, 9 deletions
diff --git a/object-store.h b/object-store.h index 7f7b3cdd80..55ee639350 100644 --- a/object-store.h +++ b/object-store.h @@ -60,6 +60,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb, void odb_clear_loose_cache(struct object_directory *odb); struct packed_git { + struct hashmap_entry packmap_ent; struct packed_git *next; struct list_head mru; struct pack_window *windows; @@ -88,6 +89,20 @@ struct packed_git { struct multi_pack_index; +static inline int pack_map_entry_cmp(const void *unused_cmp_data, + const struct hashmap_entry *entry, + const struct hashmap_entry *entry2, + const void *keydata) +{ + const char *key = keydata; + const struct packed_git *pg1, *pg2; + + pg1 = container_of(entry, const struct packed_git, packmap_ent); + pg2 = container_of(entry2, const struct packed_git, packmap_ent); + + return strcmp(pg1->pack_name, key ? key : pg2->pack_name); +} + struct raw_object_store { /* * Set of all object directories; the main directory is first (and @@ -132,6 +147,12 @@ struct raw_object_store { struct list_head packed_git_mru; /* + * A map of packfiles to packed_git structs for tracking which + * packs have been loaded already. + */ + struct hashmap pack_map; + + /* * A fast, rough count of the number of objects in the repository. * These two fields are not meant for direct access. Use * approximate_object_count() instead. @@ -479,6 +479,7 @@ struct raw_object_store *raw_object_store_new(void) memset(o, 0, sizeof(*o)); INIT_LIST_HEAD(&o->packed_git_mru); + hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0); return o; } @@ -518,6 +519,8 @@ void raw_object_store_clear(struct raw_object_store *o) INIT_LIST_HEAD(&o->packed_git_mru); close_object_store(o); o->packed_git = NULL; + + hashmap_free(&o->pack_map); } void parsed_object_pool_clear(struct parsed_object_pool *o) diff --git a/packfile.c b/packfile.c index 355066de17..f0dc63e92f 100644 --- a/packfile.c +++ b/packfile.c @@ -757,6 +757,9 @@ void install_packed_git(struct repository *r, struct packed_git *pack) pack->next = r->objects->packed_git; r->objects->packed_git = pack; + + hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name)); + hashmap_add(&r->objects->pack_map, &pack->packmap_ent); } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -856,20 +859,18 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (strip_suffix_mem(full_name, &base_len, ".idx") && !(data->m && midx_contains_pack(data->m, file_name))) { - /* Don't reopen a pack we already have. */ - for (p = data->r->objects->packed_git; p; p = p->next) { - size_t len; - if (strip_suffix(p->pack_name, ".pack", &len) && - len == base_len && - !memcmp(p->pack_name, full_name, len)) - break; - } + struct hashmap_entry hent; + char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name); + unsigned int hash = strhash(pack_name); + hashmap_entry_init(&hent, hash); - if (!p) { + /* Don't reopen a pack we already have. */ + if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) { p = add_packed_git(full_name, full_name_len, data->local); if (p) install_packed_git(data->r, p); } + free(pack_name); } if (!report_garbage) diff --git a/t/perf/p5303-many-packs.sh b/t/perf/p5303-many-packs.sh index a369152c47..7ee791669a 100755 --- a/t/perf/p5303-many-packs.sh +++ b/t/perf/p5303-many-packs.sh @@ -85,4 +85,22 @@ do ' done +# Measure pack loading with 10,000 packs. +test_expect_success 'generate lots of packs' ' + for i in $(test_seq 10000); do + echo "blob" + echo "data <<EOF" + echo "blob $i" + echo "EOF" + echo "checkpoint" + done | + git -c fastimport.unpackLimit=0 fast-import +' + +# The purpose of this test is to evaluate load time for a large number +# of packs while doing as little other work as possible. +test_perf "load 10,000 packs" ' + git rev-parse --verify "HEAD^{commit}" +' + test_done |