From 8b2f8cbcb16b1a9775214fe1d69aeb1580ae179d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 4 Oct 2018 17:13:06 +0200 Subject: oidset: use khash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reimplement oidset using khash.h in order to reduce its memory footprint and make it faster. Performance of a command that mainly checks for duplicate objects using an oidset, with master and Clang 6.0.1: $ cmd="./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)'" $ /usr/bin/time $cmd >/dev/null 0.22user 0.03system 0:00.25elapsed 99%CPU (0avgtext+0avgdata 48484maxresident)k 0inputs+0outputs (0major+11204minor)pagefaults 0swaps $ hyperfine "$cmd" Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)' Time (mean ± σ): 250.0 ms ± 6.0 ms [User: 225.9 ms, System: 23.6 ms] Range (min … max): 242.0 ms … 261.1 ms And with this patch: $ /usr/bin/time $cmd >/dev/null 0.14user 0.00system 0:00.15elapsed 100%CPU (0avgtext+0avgdata 41396maxresident)k 0inputs+0outputs (0major+8318minor)pagefaults 0swaps $ hyperfine "$cmd" Benchmark #1: ./git-cat-file --batch-all-objects --unordered --buffer --batch-check='%(objectname)' Time (mean ± σ): 151.9 ms ± 4.9 ms [User: 130.5 ms, System: 21.2 ms] Range (min … max): 148.2 ms … 170.4 ms Initial-patch-by: Jeff King Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- oidset.h | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) (limited to 'oidset.h') diff --git a/oidset.h b/oidset.h index 40ec5f87fe..4b90540cd4 100644 --- a/oidset.h +++ b/oidset.h @@ -1,7 +1,8 @@ #ifndef OIDSET_H #define OIDSET_H -#include "oidmap.h" +#include "hashmap.h" +#include "khash.h" /** * This API is similar to sha1-array, in that it maintains a set of object ids @@ -15,19 +16,33 @@ * table overhead. */ +static inline unsigned int oid_hash(struct object_id oid) +{ + return sha1hash(oid.hash); +} + +static inline int oid_equal(struct object_id a, struct object_id b) +{ + return oideq(&a, &b); +} + +KHASH_INIT(oid, struct object_id, int, 0, oid_hash, oid_equal) + /** * A single oidset; should be zero-initialized (or use OIDSET_INIT). */ struct oidset { - struct oidmap map; + kh_oid_t set; }; -#define OIDSET_INIT { OIDMAP_INIT } +#define OIDSET_INIT { { 0 } } static inline void oidset_init(struct oidset *set, size_t initial_size) { - oidmap_init(&set->map, initial_size); + memset(&set->set, 0, sizeof(set->set)); + if (initial_size) + kh_resize_oid(&set->set, initial_size); } /** @@ -58,19 +73,24 @@ int oidset_remove(struct oidset *set, const struct object_id *oid); void oidset_clear(struct oidset *set); struct oidset_iter { - struct oidmap_iter m_iter; + kh_oid_t *set; + khiter_t iter; }; static inline void oidset_iter_init(struct oidset *set, struct oidset_iter *iter) { - oidmap_iter_init(&set->map, &iter->m_iter); + iter->set = &set->set; + iter->iter = kh_begin(iter->set); } static inline struct object_id *oidset_iter_next(struct oidset_iter *iter) { - struct oidmap_entry *e = oidmap_iter_next(&iter->m_iter); - return e ? &e->oid : NULL; + for (; iter->iter != kh_end(iter->set); iter->iter++) { + if (kh_exist(iter->set, iter->iter)) + return &kh_key(iter->set, iter->iter++); + } + return NULL; } static inline struct object_id *oidset_iter_first(struct oidset *set, -- cgit v1.2.3 From 8c84ae659e0e17d55f5ddc58bc79855ed7650e00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 4 Oct 2018 17:14:37 +0200 Subject: oidset: uninline oidset_init() There is no need to inline oidset_init(), as it's typically only called twice in the lifetime of an oidset (once at the beginning and at the end by oidset_clear()) and kh_resize_* is quite big, so move its definition to oidset.c. Document it while we're at it. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- oidset.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'oidset.h') diff --git a/oidset.h b/oidset.h index 4b90540cd4..c9d0f6d3cc 100644 --- a/oidset.h +++ b/oidset.h @@ -38,12 +38,13 @@ struct oidset { #define OIDSET_INIT { { 0 } } -static inline void oidset_init(struct oidset *set, size_t initial_size) -{ - memset(&set->set, 0, sizeof(set->set)); - if (initial_size) - kh_resize_oid(&set->set, initial_size); -} +/** + * Initialize the oidset structure `set`. + * + * If `initial_size` is bigger than 0 then preallocate to allow inserting + * the specified number of elements without further allocations. + */ +void oidset_init(struct oidset *set, size_t initial_size); /** * Returns true iff `set` contains `oid`. -- cgit v1.2.3