diff options
author | Junio C Hamano <gitster@pobox.com> | 2016-09-08 21:49:46 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2016-09-08 21:49:46 -0700 |
commit | c4071eace94a7dca1cc0c0702fc0d4f23d2fc510 (patch) | |
tree | d394c5a3eed30d33092ca31c7a28c015af698776 | |
parent | Git 2.10 (diff) | |
parent | t/perf: add basic perf tests for delta base cache (diff) | |
download | tgif-c4071eace94a7dca1cc0c0702fc0d4f23d2fc510.tar.xz |
Merge branch 'jk/delta-base-cache'
The delta-base-cache mechanism has been a key to the performance in
a repository with a tightly packed packfile, but it did not scale
well even with a larger value of core.deltaBaseCacheLimit.
* jk/delta-base-cache:
t/perf: add basic perf tests for delta base cache
delta_base_cache: use hashmap.h
delta_base_cache: drop special treatment of blobs
delta_base_cache: use list.h for LRU
release_delta_base_cache: reuse existing detach function
clear_delta_base_cache_entry: use a more descriptive name
cache_or_unpack_entry: drop keep_cache parameter
-rw-r--r-- | sha1_file.c | 153 | ||||
-rwxr-xr-x | t/perf/p0003-delta-base-cache.sh | 31 |
2 files changed, 111 insertions, 73 deletions
diff --git a/sha1_file.c b/sha1_file.c index 3045aeabda..a57b71d133 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -24,6 +24,7 @@ #include "streaming.h" #include "dir.h" #include "mru.h" +#include "list.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -2073,136 +2074,142 @@ static void *unpack_compressed_entry(struct packed_git *p, return buffer; } -#define MAX_DELTA_CACHE (256) - +static struct hashmap delta_base_cache; static size_t delta_base_cached; -static struct delta_base_cache_lru_list { - struct delta_base_cache_lru_list *prev; - struct delta_base_cache_lru_list *next; -} delta_base_cache_lru = { &delta_base_cache_lru, &delta_base_cache_lru }; +static LIST_HEAD(delta_base_cache_lru); -static struct delta_base_cache_entry { - struct delta_base_cache_lru_list lru; - void *data; +struct delta_base_cache_key { struct packed_git *p; off_t base_offset; +}; + +struct delta_base_cache_entry { + struct hashmap hash; + struct delta_base_cache_key key; + struct list_head lru; + void *data; unsigned long size; enum object_type type; -} delta_base_cache[MAX_DELTA_CACHE]; +}; -static unsigned long pack_entry_hash(struct packed_git *p, off_t base_offset) +static unsigned int pack_entry_hash(struct packed_git *p, off_t base_offset) { - unsigned long hash; + unsigned int hash; - hash = (unsigned long)(intptr_t)p + (unsigned long)base_offset; + hash = (unsigned int)(intptr_t)p + (unsigned int)base_offset; hash += (hash >> 8) + (hash >> 16); - return hash % MAX_DELTA_CACHE; + return hash; } static struct delta_base_cache_entry * get_delta_base_cache_entry(struct packed_git *p, off_t base_offset) { - unsigned long hash = pack_entry_hash(p, base_offset); - return delta_base_cache + hash; + struct hashmap_entry entry; + struct delta_base_cache_key key; + + if (!delta_base_cache.cmpfn) + return NULL; + + hashmap_entry_init(&entry, pack_entry_hash(p, base_offset)); + key.p = p; + key.base_offset = base_offset; + return hashmap_get(&delta_base_cache, &entry, &key); +} + +static int delta_base_cache_key_eq(const struct delta_base_cache_key *a, + const struct delta_base_cache_key *b) +{ + return a->p == b->p && a->base_offset == b->base_offset; } -static int eq_delta_base_cache_entry(struct delta_base_cache_entry *ent, - struct packed_git *p, off_t base_offset) +static int delta_base_cache_hash_cmp(const void *va, const void *vb, + const void *vkey) { - return (ent->data && ent->p == p && ent->base_offset == base_offset); + const struct delta_base_cache_entry *a = va, *b = vb; + const struct delta_base_cache_key *key = vkey; + if (key) + return !delta_base_cache_key_eq(&a->key, key); + else + return !delta_base_cache_key_eq(&a->key, &b->key); } static int in_delta_base_cache(struct packed_git *p, off_t base_offset) { - struct delta_base_cache_entry *ent; - ent = get_delta_base_cache_entry(p, base_offset); - return eq_delta_base_cache_entry(ent, p, base_offset); + return !!get_delta_base_cache_entry(p, base_offset); } -static void clear_delta_base_cache_entry(struct delta_base_cache_entry *ent) +/* + * Remove the entry from the cache, but do _not_ free the associated + * entry data. The caller takes ownership of the "data" buffer, and + * should copy out any fields it wants before detaching. + */ +static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent) { - ent->data = NULL; - ent->lru.next->prev = ent->lru.prev; - ent->lru.prev->next = ent->lru.next; + hashmap_remove(&delta_base_cache, ent, &ent->key); + list_del(&ent->lru); delta_base_cached -= ent->size; + free(ent); } static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, - unsigned long *base_size, enum object_type *type, int keep_cache) + unsigned long *base_size, enum object_type *type) { struct delta_base_cache_entry *ent; - void *ret; ent = get_delta_base_cache_entry(p, base_offset); - - if (!eq_delta_base_cache_entry(ent, p, base_offset)) + if (!ent) return unpack_entry(p, base_offset, type, base_size); - ret = ent->data; - - if (!keep_cache) - clear_delta_base_cache_entry(ent); - else - ret = xmemdupz(ent->data, ent->size); *type = ent->type; *base_size = ent->size; - return ret; + return xmemdupz(ent->data, ent->size); } static inline void release_delta_base_cache(struct delta_base_cache_entry *ent) { - if (ent->data) { - free(ent->data); - ent->data = NULL; - ent->lru.next->prev = ent->lru.prev; - ent->lru.prev->next = ent->lru.next; - delta_base_cached -= ent->size; - } + free(ent->data); + detach_delta_base_cache_entry(ent); } void clear_delta_base_cache(void) { - unsigned long p; - for (p = 0; p < MAX_DELTA_CACHE; p++) - release_delta_base_cache(&delta_base_cache[p]); + struct hashmap_iter iter; + struct delta_base_cache_entry *entry; + for (entry = hashmap_iter_first(&delta_base_cache, &iter); + entry; + entry = hashmap_iter_next(&iter)) { + release_delta_base_cache(entry); + } } static void add_delta_base_cache(struct packed_git *p, off_t base_offset, void *base, unsigned long base_size, enum object_type type) { - unsigned long hash = pack_entry_hash(p, base_offset); - struct delta_base_cache_entry *ent = delta_base_cache + hash; - struct delta_base_cache_lru_list *lru; + struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent)); + struct list_head *lru; - release_delta_base_cache(ent); delta_base_cached += base_size; - for (lru = delta_base_cache_lru.next; - delta_base_cached > delta_base_cache_limit - && lru != &delta_base_cache_lru; - lru = lru->next) { - struct delta_base_cache_entry *f = (void *)lru; - if (f->type == OBJ_BLOB) - release_delta_base_cache(f); - } - for (lru = delta_base_cache_lru.next; - delta_base_cached > delta_base_cache_limit - && lru != &delta_base_cache_lru; - lru = lru->next) { - struct delta_base_cache_entry *f = (void *)lru; + list_for_each(lru, &delta_base_cache_lru) { + struct delta_base_cache_entry *f = + list_entry(lru, struct delta_base_cache_entry, lru); + if (delta_base_cached <= delta_base_cache_limit) + break; release_delta_base_cache(f); } - ent->p = p; - ent->base_offset = base_offset; + ent->key.p = p; + ent->key.base_offset = base_offset; ent->type = type; ent->data = base; ent->size = base_size; - ent->lru.next = &delta_base_cache_lru; - ent->lru.prev = delta_base_cache_lru.prev; - delta_base_cache_lru.prev->next = &ent->lru; - delta_base_cache_lru.prev = &ent->lru; + list_add_tail(&ent->lru, &delta_base_cache_lru); + + if (!delta_base_cache.cmpfn) + hashmap_init(&delta_base_cache, delta_base_cache_hash_cmp, 0); + hashmap_entry_init(ent, pack_entry_hash(p, base_offset)); + hashmap_add(&delta_base_cache, ent); } static void *read_object(const unsigned char *sha1, enum object_type *type, @@ -2246,11 +2253,11 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, struct delta_base_cache_entry *ent; ent = get_delta_base_cache_entry(p, curpos); - if (eq_delta_base_cache_entry(ent, p, curpos)) { + if (ent) { type = ent->type; data = ent->data; size = ent->size; - clear_delta_base_cache_entry(ent); + detach_delta_base_cache_entry(ent); base_from_cache = 1; break; } @@ -2755,7 +2762,7 @@ static void *read_packed_sha1(const unsigned char *sha1, if (!find_pack_entry(sha1, &e)) return NULL; - data = cache_or_unpack_entry(e.p, e.offset, size, type, 1); + data = cache_or_unpack_entry(e.p, e.offset, size, type); if (!data) { /* * We're probably in deep shit, but let's try to fetch diff --git a/t/perf/p0003-delta-base-cache.sh b/t/perf/p0003-delta-base-cache.sh new file mode 100755 index 0000000000..62369eaaf0 --- /dev/null +++ b/t/perf/p0003-delta-base-cache.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +test_description='Test operations that emphasize the delta base cache. + +We look at both "log --raw", which should put only trees into the delta cache, +and "log -Sfoo --raw", which should look at both trees and blobs. + +Any effects will be emphasized if the test repository is fully packed (loose +objects obviously do not use the delta base cache at all). It is also +emphasized if the pack has long delta chains (e.g., as produced by "gc +--aggressive"), though cache is still quite noticeable even with the default +depth of 50. + +The setting of core.deltaBaseCacheLimit in the source repository is also +relevant (depending on the size of your test repo), so be sure it is consistent +between runs. +' +. ./perf-lib.sh + +test_perf_large_repo + +# puts mostly trees into the delta base cache +test_perf 'log --raw' ' + git log --raw >/dev/null +' + +test_perf 'log -S' ' + git log --raw -Sfoo >/dev/null +' + +test_done |