summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2019-12-16 13:08:31 -0800
committerLibravatar Junio C Hamano <gitster@pobox.com>2019-12-16 13:08:32 -0800
commit6d831b8a3ec7dc06981e6347eb9a45849ef88665 (patch)
treefc092e735f665fb642eb9e03e5c5769bd3aa99c9
parentMerge branch 'js/builtin-add-i-cmds' (diff)
parentpackfile.c: speed up loading lots of packfiles (diff)
downloadtgif-6d831b8a3ec7dc06981e6347eb9a45849ef88665.tar.xz
Merge branch 'cs/store-packfiles-in-hashmap'
In a repository with many packfiles, the cost of the procedure that avoids registering the same packfile twice was unnecessarily high by using an inefficient search algorithm, which has been corrected. * cs/store-packfiles-in-hashmap: packfile.c: speed up loading lots of packfiles
-rw-r--r--object-store.h21
-rw-r--r--object.c3
-rw-r--r--packfile.c19
-rwxr-xr-xt/perf/p5303-many-packs.sh18
4 files changed, 52 insertions, 9 deletions
diff --git a/object-store.h b/object-store.h
index 7f7b3cdd80..55ee639350 100644
--- a/object-store.h
+++ b/object-store.h
@@ -60,6 +60,7 @@ struct oid_array *odb_loose_cache(struct object_directory *odb,
void odb_clear_loose_cache(struct object_directory *odb);
struct packed_git {
+ struct hashmap_entry packmap_ent;
struct packed_git *next;
struct list_head mru;
struct pack_window *windows;
@@ -88,6 +89,20 @@ struct packed_git {
struct multi_pack_index;
+static inline int pack_map_entry_cmp(const void *unused_cmp_data,
+ const struct hashmap_entry *entry,
+ const struct hashmap_entry *entry2,
+ const void *keydata)
+{
+ const char *key = keydata;
+ const struct packed_git *pg1, *pg2;
+
+ pg1 = container_of(entry, const struct packed_git, packmap_ent);
+ pg2 = container_of(entry2, const struct packed_git, packmap_ent);
+
+ return strcmp(pg1->pack_name, key ? key : pg2->pack_name);
+}
+
struct raw_object_store {
/*
* Set of all object directories; the main directory is first (and
@@ -132,6 +147,12 @@ struct raw_object_store {
struct list_head packed_git_mru;
/*
+ * A map of packfiles to packed_git structs for tracking which
+ * packs have been loaded already.
+ */
+ struct hashmap pack_map;
+
+ /*
* A fast, rough count of the number of objects in the repository.
* These two fields are not meant for direct access. Use
* approximate_object_count() instead.
diff --git a/object.c b/object.c
index 3b8b8c55c9..142ef69399 100644
--- a/object.c
+++ b/object.c
@@ -479,6 +479,7 @@ struct raw_object_store *raw_object_store_new(void)
memset(o, 0, sizeof(*o));
INIT_LIST_HEAD(&o->packed_git_mru);
+ hashmap_init(&o->pack_map, pack_map_entry_cmp, NULL, 0);
return o;
}
@@ -518,6 +519,8 @@ void raw_object_store_clear(struct raw_object_store *o)
INIT_LIST_HEAD(&o->packed_git_mru);
close_object_store(o);
o->packed_git = NULL;
+
+ hashmap_free(&o->pack_map);
}
void parsed_object_pool_clear(struct parsed_object_pool *o)
diff --git a/packfile.c b/packfile.c
index 355066de17..f0dc63e92f 100644
--- a/packfile.c
+++ b/packfile.c
@@ -757,6 +757,9 @@ void install_packed_git(struct repository *r, struct packed_git *pack)
pack->next = r->objects->packed_git;
r->objects->packed_git = pack;
+
+ hashmap_entry_init(&pack->packmap_ent, strhash(pack->pack_name));
+ hashmap_add(&r->objects->pack_map, &pack->packmap_ent);
}
void (*report_garbage)(unsigned seen_bits, const char *path);
@@ -856,20 +859,18 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
if (strip_suffix_mem(full_name, &base_len, ".idx") &&
!(data->m && midx_contains_pack(data->m, file_name))) {
- /* Don't reopen a pack we already have. */
- for (p = data->r->objects->packed_git; p; p = p->next) {
- size_t len;
- if (strip_suffix(p->pack_name, ".pack", &len) &&
- len == base_len &&
- !memcmp(p->pack_name, full_name, len))
- break;
- }
+ struct hashmap_entry hent;
+ char *pack_name = xstrfmt("%.*s.pack", (int)base_len, full_name);
+ unsigned int hash = strhash(pack_name);
+ hashmap_entry_init(&hent, hash);
- if (!p) {
+ /* Don't reopen a pack we already have. */
+ if (!hashmap_get(&data->r->objects->pack_map, &hent, pack_name)) {
p = add_packed_git(full_name, full_name_len, data->local);
if (p)
install_packed_git(data->r, p);
}
+ free(pack_name);
}
if (!report_garbage)
diff --git a/t/perf/p5303-many-packs.sh b/t/perf/p5303-many-packs.sh
index a369152c47..7ee791669a 100755
--- a/t/perf/p5303-many-packs.sh
+++ b/t/perf/p5303-many-packs.sh
@@ -85,4 +85,22 @@ do
'
done
+# Measure pack loading with 10,000 packs.
+test_expect_success 'generate lots of packs' '
+ for i in $(test_seq 10000); do
+ echo "blob"
+ echo "data <<EOF"
+ echo "blob $i"
+ echo "EOF"
+ echo "checkpoint"
+ done |
+ git -c fastimport.unpackLimit=0 fast-import
+'
+
+# The purpose of this test is to evaluate load time for a large number
+# of packs while doing as little other work as possible.
+test_perf "load 10,000 packs" '
+ git rev-parse --verify "HEAD^{commit}"
+'
+
test_done