diff options
author | Junio C Hamano <gitster@pobox.com> | 2021-07-28 13:17:57 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2021-07-28 13:17:57 -0700 |
commit | e5cc59c77c875aeda93a3cdfe70c8254164324ab (patch) | |
tree | 2299898410f357bfe17235877d70fdd1147c0ac7 | |
parent | Merge branch 'hj/commit-allow-empty-message' (diff) | |
parent | oidtree: a crit-bit tree for odb_loose_cache (diff) | |
download | tgif-e5cc59c77c875aeda93a3cdfe70c8254164324ab.tar.xz |
Merge branch 'ew/many-alternate-optim'
Optimization for repositories with many alternate object store.
* ew/many-alternate-optim:
oidtree: a crit-bit tree for odb_loose_cache
oidcpy_with_padding: constify `src' arg
make object_directory.loose_objects_subdir_seen a bitmap
avoid strlen via strbuf_addstr in link_alt_odb_entry
speed up alt_odb_usable() with many alternates
-rw-r--r-- | Makefile | 3 | ||||
-rw-r--r-- | cbtree.c | 167 | ||||
-rw-r--r-- | cbtree.h | 56 | ||||
-rw-r--r-- | dir.c | 10 | ||||
-rw-r--r-- | dir.h | 2 | ||||
-rw-r--r-- | hash.h | 2 | ||||
-rw-r--r-- | object-file.c | 75 | ||||
-rw-r--r-- | object-name.c | 28 | ||||
-rw-r--r-- | object-store.h | 14 | ||||
-rw-r--r-- | object.c | 2 | ||||
-rw-r--r-- | oidtree.c | 104 | ||||
-rw-r--r-- | oidtree.h | 22 | ||||
-rw-r--r-- | t/helper/test-oidtree.c | 49 | ||||
-rw-r--r-- | t/helper/test-tool.c | 1 | ||||
-rw-r--r-- | t/helper/test-tool.h | 1 | ||||
-rwxr-xr-x | t/t0069-oidtree.sh | 49 |
16 files changed, 534 insertions, 51 deletions
@@ -726,6 +726,7 @@ TEST_BUILTINS_OBJS += test-mergesort.o TEST_BUILTINS_OBJS += test-mktemp.o TEST_BUILTINS_OBJS += test-oid-array.o TEST_BUILTINS_OBJS += test-oidmap.o +TEST_BUILTINS_OBJS += test-oidtree.o TEST_BUILTINS_OBJS += test-online-cpus.o TEST_BUILTINS_OBJS += test-parse-options.o TEST_BUILTINS_OBJS += test-parse-pathspec-file.o @@ -850,6 +851,7 @@ LIB_OBJS += branch.o LIB_OBJS += bulk-checkin.o LIB_OBJS += bundle.o LIB_OBJS += cache-tree.o +LIB_OBJS += cbtree.o LIB_OBJS += chdir-notify.o LIB_OBJS += checkout.o LIB_OBJS += chunk-format.o @@ -945,6 +947,7 @@ LIB_OBJS += object.o LIB_OBJS += oid-array.o LIB_OBJS += oidmap.o LIB_OBJS += oidset.o +LIB_OBJS += oidtree.o LIB_OBJS += pack-bitmap-write.o LIB_OBJS += pack-bitmap.o LIB_OBJS += pack-check.o diff --git a/cbtree.c b/cbtree.c new file mode 100644 index 0000000000..b0c65d810f --- /dev/null +++ b/cbtree.c @@ -0,0 +1,167 @@ +/* + * crit-bit tree implementation, does no allocations internally + * For more information on crit-bit trees: https://cr.yp.to/critbit.html + * Based on Adam Langley's adaptation of Dan Bernstein's public domain code + * git clone https://github.com/agl/critbit.git + */ +#include "cbtree.h" + +static struct cb_node *cb_node_of(const void *p) +{ + return (struct cb_node *)((uintptr_t)p - 1); +} + +/* locate the best match, does not do a final comparision */ +static struct cb_node *cb_internal_best_match(struct cb_node *p, + const uint8_t *k, size_t klen) +{ + while (1 & (uintptr_t)p) { + struct cb_node *q = cb_node_of(p); + uint8_t c = q->byte < klen ? k[q->byte] : 0; + size_t direction = (1 + (q->otherbits | c)) >> 8; + + p = q->child[direction]; + } + return p; +} + +/* returns NULL if successful, existing cb_node if duplicate */ +struct cb_node *cb_insert(struct cb_tree *t, struct cb_node *node, size_t klen) +{ + size_t newbyte, newotherbits; + uint8_t c; + int newdirection; + struct cb_node **wherep, *p; + + assert(!((uintptr_t)node & 1)); /* allocations must be aligned */ + + if (!t->root) { /* insert into empty tree */ + t->root = node; + return NULL; /* success */ + } + + /* see if a node already exists */ + p = cb_internal_best_match(t->root, node->k, klen); + + /* find first differing byte */ + for (newbyte = 0; newbyte < klen; newbyte++) { + if (p->k[newbyte] != node->k[newbyte]) + goto different_byte_found; + } + return p; /* element exists, let user deal with it */ + +different_byte_found: + newotherbits = p->k[newbyte] ^ node->k[newbyte]; + newotherbits |= newotherbits >> 1; + newotherbits |= newotherbits >> 2; + newotherbits |= newotherbits >> 4; + newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; + c = p->k[newbyte]; + newdirection = (1 + (newotherbits | c)) >> 8; + + node->byte = newbyte; + node->otherbits = newotherbits; + node->child[1 - newdirection] = node; + + /* find a place to insert it */ + wherep = &t->root; + for (;;) { + struct cb_node *q; + size_t direction; + + p = *wherep; + if (!(1 & (uintptr_t)p)) + break; + q = cb_node_of(p); + if (q->byte > newbyte) + break; + if (q->byte == newbyte && q->otherbits > newotherbits) + break; + c = q->byte < klen ? node->k[q->byte] : 0; + direction = (1 + (q->otherbits | c)) >> 8; + wherep = q->child + direction; + } + + node->child[newdirection] = *wherep; + *wherep = (struct cb_node *)(1 + (uintptr_t)node); + + return NULL; /* success */ +} + +struct cb_node *cb_lookup(struct cb_tree *t, const uint8_t *k, size_t klen) +{ + struct cb_node *p = cb_internal_best_match(t->root, k, klen); + + return p && !memcmp(p->k, k, klen) ? p : NULL; +} + +struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen) +{ + struct cb_node **wherep = &t->root; + struct cb_node **whereq = NULL; + struct cb_node *q = NULL; + size_t direction = 0; + uint8_t c; + struct cb_node *p = t->root; + + if (!p) return NULL; /* empty tree, nothing to delete */ + + /* traverse to find best match, keeping link to parent */ + while (1 & (uintptr_t)p) { + whereq = wherep; + q = cb_node_of(p); + c = q->byte < klen ? k[q->byte] : 0; + direction = (1 + (q->otherbits | c)) >> 8; + wherep = q->child + direction; + p = *wherep; + } + + if (memcmp(p->k, k, klen)) + return NULL; /* no match, nothing unlinked */ + + /* found an exact match */ + if (whereq) /* update parent */ + *whereq = q->child[1 - direction]; + else + t->root = NULL; + return p; +} + +static enum cb_next cb_descend(struct cb_node *p, cb_iter fn, void *arg) +{ + if (1 & (uintptr_t)p) { + struct cb_node *q = cb_node_of(p); + enum cb_next n = cb_descend(q->child[0], fn, arg); + + return n == CB_BREAK ? n : cb_descend(q->child[1], fn, arg); + } else { + return fn(p, arg); + } +} + +void cb_each(struct cb_tree *t, const uint8_t *kpfx, size_t klen, + cb_iter fn, void *arg) +{ + struct cb_node *p = t->root; + struct cb_node *top = p; + size_t i = 0; + + if (!p) return; /* empty tree */ + + /* Walk tree, maintaining top pointer */ + while (1 & (uintptr_t)p) { + struct cb_node *q = cb_node_of(p); + uint8_t c = q->byte < klen ? kpfx[q->byte] : 0; + size_t direction = (1 + (q->otherbits | c)) >> 8; + + p = q->child[direction]; + if (q->byte < klen) + top = p; + } + + for (i = 0; i < klen; i++) { + if (p->k[i] != kpfx[i]) + return; /* "best" match failed */ + } + cb_descend(top, fn, arg); +} diff --git a/cbtree.h b/cbtree.h new file mode 100644 index 0000000000..fe4587087e --- /dev/null +++ b/cbtree.h @@ -0,0 +1,56 @@ +/* + * crit-bit tree implementation, does no allocations internally + * For more information on crit-bit trees: https://cr.yp.to/critbit.html + * Based on Adam Langley's adaptation of Dan Bernstein's public domain code + * git clone https://github.com/agl/critbit.git + * + * This is adapted to store arbitrary data (not just NUL-terminated C strings + * and allocates no memory internally. The user needs to allocate + * "struct cb_node" and fill cb_node.k[] with arbitrary match data + * for memcmp. + * If "klen" is variable, then it should be embedded into "c_node.k[]" + * Recursion is bound by the maximum value of "klen" used. + */ +#ifndef CBTREE_H +#define CBTREE_H + +#include "git-compat-util.h" + +struct cb_node; +struct cb_node { + struct cb_node *child[2]; + /* + * n.b. uint32_t for `byte' is excessive for OIDs, + * we may consider shorter variants if nothing else gets stored. + */ + uint32_t byte; + uint8_t otherbits; + uint8_t k[FLEX_ARRAY]; /* arbitrary data */ +}; + +struct cb_tree { + struct cb_node *root; +}; + +enum cb_next { + CB_CONTINUE = 0, + CB_BREAK = 1 +}; + +#define CBTREE_INIT { .root = NULL } + +static inline void cb_init(struct cb_tree *t) +{ + t->root = NULL; +} + +struct cb_node *cb_lookup(struct cb_tree *, const uint8_t *k, size_t klen); +struct cb_node *cb_insert(struct cb_tree *, struct cb_node *, size_t klen); +struct cb_node *cb_unlink(struct cb_tree *t, const uint8_t *k, size_t klen); + +typedef enum cb_next (*cb_iter)(struct cb_node *, void *arg); + +void cb_each(struct cb_tree *, const uint8_t *kpfx, size_t klen, + cb_iter, void *arg); + +#endif /* CBTREE_H */ @@ -78,11 +78,21 @@ int fspathcmp(const char *a, const char *b) return ignore_case ? strcasecmp(a, b) : strcmp(a, b); } +int fspatheq(const char *a, const char *b) +{ + return !fspathcmp(a, b); +} + int fspathncmp(const char *a, const char *b, size_t count) { return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); } +unsigned int fspathhash(const char *str) +{ + return ignore_case ? strihash(str) : strhash(str); +} + int git_fnmatch(const struct pathspec_item *item, const char *pattern, const char *string, int prefix) @@ -489,7 +489,9 @@ int remove_dir_recursively(struct strbuf *path, int flag); int remove_path(const char *path); int fspathcmp(const char *a, const char *b); +int fspatheq(const char *a, const char *b); int fspathncmp(const char *a, const char *b, size_t count); +unsigned int fspathhash(const char *str); /* * The prefix part of pattern must not contains wildcards. @@ -265,7 +265,7 @@ static inline void oidcpy(struct object_id *dst, const struct object_id *src) /* Like oidcpy() but zero-pads the unused bytes in dst's hash array. */ static inline void oidcpy_with_padding(struct object_id *dst, - struct object_id *src) + const struct object_id *src) { size_t hashsz; diff --git a/object-file.c b/object-file.c index ecca5a8da0..3d27dc8dea 100644 --- a/object-file.c +++ b/object-file.c @@ -517,9 +517,9 @@ const char *loose_object_path(struct repository *r, struct strbuf *buf, */ static int alt_odb_usable(struct raw_object_store *o, struct strbuf *path, - const char *normalized_objdir) + const char *normalized_objdir, khiter_t *pos) { - struct object_directory *odb; + int r; /* Detect cases where alternate disappeared */ if (!is_directory(path->buf)) { @@ -533,14 +533,20 @@ static int alt_odb_usable(struct raw_object_store *o, * Prevent the common mistake of listing the same * thing twice, or object directory itself. */ - for (odb = o->odb; odb; odb = odb->next) { - if (!fspathcmp(path->buf, odb->path)) - return 0; + if (!o->odb_by_path) { + khiter_t p; + + o->odb_by_path = kh_init_odb_path_map(); + assert(!o->odb->next); + p = kh_put_odb_path_map(o->odb_by_path, o->odb->path, &r); + assert(r == 1); /* never used */ + kh_value(o->odb_by_path, p) = o->odb; } - if (!fspathcmp(path->buf, normalized_objdir)) + if (fspatheq(path->buf, normalized_objdir)) return 0; - - return 1; + *pos = kh_put_odb_path_map(o->odb_by_path, path->buf, &r); + /* r: 0 = exists, 1 = never used, 2 = deleted */ + return r == 0 ? 0 : 1; } /* @@ -561,17 +567,18 @@ static int alt_odb_usable(struct raw_object_store *o, static void read_info_alternates(struct repository *r, const char *relative_base, int depth); -static int link_alt_odb_entry(struct repository *r, const char *entry, +static int link_alt_odb_entry(struct repository *r, const struct strbuf *entry, const char *relative_base, int depth, const char *normalized_objdir) { struct object_directory *ent; struct strbuf pathbuf = STRBUF_INIT; + khiter_t pos; - if (!is_absolute_path(entry) && relative_base) { + if (!is_absolute_path(entry->buf) && relative_base) { strbuf_realpath(&pathbuf, relative_base, 1); strbuf_addch(&pathbuf, '/'); } - strbuf_addstr(&pathbuf, entry); + strbuf_addbuf(&pathbuf, entry); if (strbuf_normalize_path(&pathbuf) < 0 && relative_base) { error(_("unable to normalize alternate object path: %s"), @@ -587,23 +594,25 @@ static int link_alt_odb_entry(struct repository *r, const char *entry, while (pathbuf.len && pathbuf.buf[pathbuf.len - 1] == '/') strbuf_setlen(&pathbuf, pathbuf.len - 1); - if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir)) { + if (!alt_odb_usable(r->objects, &pathbuf, normalized_objdir, &pos)) { strbuf_release(&pathbuf); return -1; } CALLOC_ARRAY(ent, 1); - ent->path = xstrdup(pathbuf.buf); + /* pathbuf.buf is already in r->objects->odb_by_path */ + ent->path = strbuf_detach(&pathbuf, NULL); /* add the alternate entry */ *r->objects->odb_tail = ent; r->objects->odb_tail = &(ent->next); ent->next = NULL; + assert(r->objects->odb_by_path); + kh_value(r->objects->odb_by_path, pos) = ent; /* recursively add alternates */ - read_info_alternates(r, pathbuf.buf, depth + 1); + read_info_alternates(r, ent->path, depth + 1); - strbuf_release(&pathbuf); return 0; } @@ -660,7 +669,7 @@ static void link_alt_odb_entries(struct repository *r, const char *alt, alt = parse_alt_odb_entry(alt, sep, &entry); if (!entry.len) continue; - link_alt_odb_entry(r, entry.buf, + link_alt_odb_entry(r, &entry, relative_base, depth, objdirbuf.buf); } strbuf_release(&entry); @@ -1178,7 +1187,7 @@ static int quick_has_loose(struct repository *r, prepare_alt_odb(r); for (odb = r->objects->odb; odb; odb = odb->next) { - if (oid_array_lookup(odb_loose_cache(odb, oid), oid) >= 0) + if (oidtree_contains(odb_loose_cache(odb, oid), oid)) return 1; } return 0; @@ -2454,39 +2463,45 @@ int for_each_loose_object(each_loose_object_fn cb, void *data, static int append_loose_object(const struct object_id *oid, const char *path, void *data) { - oid_array_append(data, oid); + oidtree_insert(data, oid); return 0; } -struct oid_array *odb_loose_cache(struct object_directory *odb, +struct oidtree *odb_loose_cache(struct object_directory *odb, const struct object_id *oid) { int subdir_nr = oid->hash[0]; struct strbuf buf = STRBUF_INIT; + size_t word_bits = bitsizeof(odb->loose_objects_subdir_seen[0]); + size_t word_index = subdir_nr / word_bits; + size_t mask = 1 << (subdir_nr % word_bits); + uint32_t *bitmap; if (subdir_nr < 0 || - subdir_nr >= ARRAY_SIZE(odb->loose_objects_subdir_seen)) + subdir_nr >= bitsizeof(odb->loose_objects_subdir_seen)) BUG("subdir_nr out of range"); - if (odb->loose_objects_subdir_seen[subdir_nr]) - return &odb->loose_objects_cache[subdir_nr]; - + bitmap = &odb->loose_objects_subdir_seen[word_index]; + if (*bitmap & mask) + return odb->loose_objects_cache; + if (!odb->loose_objects_cache) { + ALLOC_ARRAY(odb->loose_objects_cache, 1); + oidtree_init(odb->loose_objects_cache); + } strbuf_addstr(&buf, odb->path); for_each_file_in_obj_subdir(subdir_nr, &buf, append_loose_object, NULL, NULL, - &odb->loose_objects_cache[subdir_nr]); - odb->loose_objects_subdir_seen[subdir_nr] = 1; + odb->loose_objects_cache); + *bitmap |= mask; strbuf_release(&buf); - return &odb->loose_objects_cache[subdir_nr]; + return odb->loose_objects_cache; } void odb_clear_loose_cache(struct object_directory *odb) { - int i; - - for (i = 0; i < ARRAY_SIZE(odb->loose_objects_cache); i++) - oid_array_clear(&odb->loose_objects_cache[i]); + oidtree_clear(odb->loose_objects_cache); + FREE_AND_NULL(odb->loose_objects_cache); memset(&odb->loose_objects_subdir_seen, 0, sizeof(odb->loose_objects_subdir_seen)); } diff --git a/object-name.c b/object-name.c index 64202de60b..3263c19457 100644 --- a/object-name.c +++ b/object-name.c @@ -87,27 +87,21 @@ static void update_candidates(struct disambiguate_state *ds, const struct object static int match_hash(unsigned, const unsigned char *, const unsigned char *); +static enum cb_next match_prefix(const struct object_id *oid, void *arg) +{ + struct disambiguate_state *ds = arg; + /* no need to call match_hash, oidtree_each did prefix match */ + update_candidates(ds, oid); + return ds->ambiguous ? CB_BREAK : CB_CONTINUE; +} + static void find_short_object_filename(struct disambiguate_state *ds) { struct object_directory *odb; - for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next) { - int pos; - struct oid_array *loose_objects; - - loose_objects = odb_loose_cache(odb, &ds->bin_pfx); - pos = oid_array_lookup(loose_objects, &ds->bin_pfx); - if (pos < 0) - pos = -1 - pos; - while (!ds->ambiguous && pos < loose_objects->nr) { - const struct object_id *oid; - oid = loose_objects->oid + pos; - if (!match_hash(ds->len, ds->bin_pfx.hash, oid->hash)) - break; - update_candidates(ds, oid); - pos++; - } - } + for (odb = ds->repo->objects->odb; odb && !ds->ambiguous; odb = odb->next) + oidtree_each(odb_loose_cache(odb, &ds->bin_pfx), + &ds->bin_pfx, ds->len, match_prefix, ds); } static int match_hash(unsigned len, const unsigned char *a, const unsigned char *b) diff --git a/object-store.h b/object-store.h index ec32c23dcb..e679acc4c3 100644 --- a/object-store.h +++ b/object-store.h @@ -7,6 +7,9 @@ #include "oid-array.h" #include "strbuf.h" #include "thread-utils.h" +#include "khash.h" +#include "dir.h" +#include "oidtree.h" struct object_directory { struct object_directory *next; @@ -20,8 +23,8 @@ struct object_directory { * * Be sure to call odb_load_loose_cache() before using. */ - char loose_objects_subdir_seen[256]; - struct oid_array loose_objects_cache[256]; + uint32_t loose_objects_subdir_seen[8]; /* 256 bits */ + struct oidtree *loose_objects_cache; /* * Path to the alternative object store. If this is a relative path, @@ -30,6 +33,9 @@ struct object_directory { char *path; }; +KHASH_INIT(odb_path_map, const char * /* key: odb_path */, + struct object_directory *, 1, fspathhash, fspatheq); + void prepare_alt_odb(struct repository *r); char *compute_alternate_path(const char *path, struct strbuf *err); typedef int alt_odb_fn(struct object_directory *, void *); @@ -54,7 +60,7 @@ void add_to_alternates_memory(const char *dir); * Populate and return the loose object cache array corresponding to the * given object ID. */ -struct oid_array *odb_loose_cache(struct object_directory *odb, +struct oidtree *odb_loose_cache(struct object_directory *odb, const struct object_id *oid); /* Empty the loose object cache for the specified object directory. */ @@ -116,6 +122,8 @@ struct raw_object_store { */ struct object_directory *odb; struct object_directory **odb_tail; + kh_odb_path_map_t *odb_by_path; + int loaded_alternates; /* @@ -511,6 +511,8 @@ static void free_object_directories(struct raw_object_store *o) free_object_directory(o->odb); o->odb = next; } + kh_destroy_odb_path_map(o->odb_by_path); + o->odb_by_path = NULL; } void raw_object_store_clear(struct raw_object_store *o) diff --git a/oidtree.c b/oidtree.c new file mode 100644 index 0000000000..7eb0e9ba05 --- /dev/null +++ b/oidtree.c @@ -0,0 +1,104 @@ +/* + * A wrapper around cbtree which stores oids + * May be used to replace oid-array for prefix (abbreviation) matches + */ +#include "oidtree.h" +#include "alloc.h" +#include "hash.h" + +struct oidtree_node { + /* n.k[] is used to store "struct object_id" */ + struct cb_node n; +}; + +struct oidtree_iter_data { + oidtree_iter fn; + void *arg; + size_t *last_nibble_at; + int algo; + uint8_t last_byte; +}; + +void oidtree_init(struct oidtree *ot) +{ + cb_init(&ot->tree); + mem_pool_init(&ot->mem_pool, 0); +} + +void oidtree_clear(struct oidtree *ot) +{ + if (ot) { + mem_pool_discard(&ot->mem_pool, 0); + oidtree_init(ot); + } +} + +void oidtree_insert(struct oidtree *ot, const struct object_id *oid) +{ + struct oidtree_node *on; + + if (!oid->algo) + BUG("oidtree_insert requires oid->algo"); + + on = mem_pool_alloc(&ot->mem_pool, sizeof(*on) + sizeof(*oid)); + oidcpy_with_padding((struct object_id *)on->n.k, oid); + + /* + * n.b. Current callers won't get us duplicates, here. If a + * future caller causes duplicates, there'll be a a small leak + * that won't be freed until oidtree_clear. Currently it's not + * worth maintaining a free list + */ + cb_insert(&ot->tree, &on->n, sizeof(*oid)); +} + + +int oidtree_contains(struct oidtree *ot, const struct object_id *oid) +{ + struct object_id k; + size_t klen = sizeof(k); + + oidcpy_with_padding(&k, oid); + + if (oid->algo == GIT_HASH_UNKNOWN) + klen -= sizeof(oid->algo); + + /* cb_lookup relies on memcmp on the struct, so order matters: */ + klen += BUILD_ASSERT_OR_ZERO(offsetof(struct object_id, hash) < + offsetof(struct object_id, algo)); + + return cb_lookup(&ot->tree, (const uint8_t *)&k, klen) ? 1 : 0; +} + +static enum cb_next iter(struct cb_node *n, void *arg) +{ + struct oidtree_iter_data *x = arg; + const struct object_id *oid = (const struct object_id *)n->k; + + if (x->algo != GIT_HASH_UNKNOWN && x->algo != oid->algo) + return CB_CONTINUE; + + if (x->last_nibble_at) { + if ((oid->hash[*x->last_nibble_at] ^ x->last_byte) & 0xf0) + return CB_CONTINUE; + } + + return x->fn(oid, x->arg); +} + +void oidtree_each(struct oidtree *ot, const struct object_id *oid, + size_t oidhexsz, oidtree_iter fn, void *arg) +{ + size_t klen = oidhexsz / 2; + struct oidtree_iter_data x = { 0 }; + assert(oidhexsz <= GIT_MAX_HEXSZ); + + x.fn = fn; + x.arg = arg; + x.algo = oid->algo; + if (oidhexsz & 1) { + x.last_byte = oid->hash[klen]; + x.last_nibble_at = &klen; + } + cb_each(&ot->tree, (const uint8_t *)oid, klen, iter, &x); +} diff --git a/oidtree.h b/oidtree.h new file mode 100644 index 0000000000..77898f510a --- /dev/null +++ b/oidtree.h @@ -0,0 +1,22 @@ +#ifndef OIDTREE_H +#define OIDTREE_H + +#include "cbtree.h" +#include "hash.h" +#include "mem-pool.h" + +struct oidtree { + struct cb_tree tree; + struct mem_pool mem_pool; +}; + +void oidtree_init(struct oidtree *); +void oidtree_clear(struct oidtree *); +void oidtree_insert(struct oidtree *, const struct object_id *); +int oidtree_contains(struct oidtree *, const struct object_id *); + +typedef enum cb_next (*oidtree_iter)(const struct object_id *, void *data); +void oidtree_each(struct oidtree *, const struct object_id *, + size_t oidhexsz, oidtree_iter, void *data); + +#endif /* OIDTREE_H */ diff --git a/t/helper/test-oidtree.c b/t/helper/test-oidtree.c new file mode 100644 index 0000000000..180ee28dd9 --- /dev/null +++ b/t/helper/test-oidtree.c @@ -0,0 +1,49 @@ +#include "test-tool.h" +#include "cache.h" +#include "oidtree.h" + +static enum cb_next print_oid(const struct object_id *oid, void *data) +{ + puts(oid_to_hex(oid)); + return CB_CONTINUE; +} + +int cmd__oidtree(int argc, const char **argv) +{ + struct oidtree ot; + struct strbuf line = STRBUF_INIT; + int nongit_ok; + int algo = GIT_HASH_UNKNOWN; + + oidtree_init(&ot); + setup_git_directory_gently(&nongit_ok); + + while (strbuf_getline(&line, stdin) != EOF) { + const char *arg; + struct object_id oid; + + if (skip_prefix(line.buf, "insert ", &arg)) { + if (get_oid_hex_any(arg, &oid) == GIT_HASH_UNKNOWN) + die("insert not a hexadecimal oid: %s", arg); + algo = oid.algo; + oidtree_insert(&ot, &oid); + } else if (skip_prefix(line.buf, "contains ", &arg)) { + if (get_oid_hex(arg, &oid)) + die("contains not a hexadecimal oid: %s", arg); + printf("%d\n", oidtree_contains(&ot, &oid)); + } else if (skip_prefix(line.buf, "each ", &arg)) { + char buf[GIT_MAX_HEXSZ + 1] = { '0' }; + memset(&oid, 0, sizeof(oid)); + memcpy(buf, arg, strlen(arg)); + buf[hash_algos[algo].hexsz] = '\0'; + get_oid_hex_any(buf, &oid); + oid.algo = algo; + oidtree_each(&ot, &oid, strlen(arg), print_oid, NULL); + } else if (!strcmp(line.buf, "clear")) { + oidtree_clear(&ot); + } else { + die("unknown command: %s", line.buf); + } + } + return 0; +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index b21e8f1519..490ac026c5 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -43,6 +43,7 @@ static struct test_cmd cmds[] = { { "mktemp", cmd__mktemp }, { "oid-array", cmd__oid_array }, { "oidmap", cmd__oidmap }, + { "oidtree", cmd__oidtree }, { "online-cpus", cmd__online_cpus }, { "parse-options", cmd__parse_options }, { "parse-pathspec-file", cmd__parse_pathspec_file }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index f845ced4b3..f8dc266721 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -32,6 +32,7 @@ int cmd__match_trees(int argc, const char **argv); int cmd__mergesort(int argc, const char **argv); int cmd__mktemp(int argc, const char **argv); int cmd__oidmap(int argc, const char **argv); +int cmd__oidtree(int argc, const char **argv); int cmd__online_cpus(int argc, const char **argv); int cmd__parse_options(int argc, const char **argv); int cmd__parse_pathspec_file(int argc, const char** argv); diff --git a/t/t0069-oidtree.sh b/t/t0069-oidtree.sh new file mode 100755 index 0000000000..bfb1397d7b --- /dev/null +++ b/t/t0069-oidtree.sh @@ -0,0 +1,49 @@ +#!/bin/sh + +test_description='basic tests for the oidtree implementation' +. ./test-lib.sh + +maxhexsz=$(test_oid hexsz) +echoid () { + prefix="${1:+$1 }" + shift + while test $# -gt 0 + do + shortoid="$1" + shift + difference=$(($maxhexsz - ${#shortoid})) + printf "%s%s%0${difference}d\\n" "$prefix" "$shortoid" "0" + done +} + +test_expect_success 'oidtree insert and contains' ' + cat >expect <<-\EOF && + 0 + 0 + 0 + 1 + 1 + 0 + EOF + { + echoid insert 444 1 2 3 4 5 a b c d e && + echoid contains 44 441 440 444 4440 4444 + echo clear + } | test-tool oidtree >actual && + test_cmp expect actual +' + +test_expect_success 'oidtree each' ' + echoid "" 123 321 321 >expect && + { + echoid insert f 9 8 123 321 a b c d e + echo each 12300 + echo each 3211 + echo each 3210 + echo each 32100 + echo clear + } | test-tool oidtree >actual && + test_cmp expect actual +' + +test_done |