diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | builtin/receive-pack.c | 41 | ||||
-rw-r--r-- | fetch-pack.c | 48 | ||||
-rw-r--r-- | object.h | 2 | ||||
-rw-r--r-- | oidset.c | 49 | ||||
-rw-r--r-- | oidset.h | 45 | ||||
-rwxr-xr-x | t/t5400-send-pack.sh | 38 | ||||
-rw-r--r-- | transport.c | 72 | ||||
-rw-r--r-- | transport.h | 2 |
9 files changed, 249 insertions, 49 deletions
@@ -781,6 +781,7 @@ LIB_OBJS += notes-cache.o LIB_OBJS += notes-merge.o LIB_OBJS += notes-utils.o LIB_OBJS += object.o +LIB_OBJS += oidset.o LIB_OBJS += pack-bitmap.o LIB_OBJS += pack-bitmap-write.o LIB_OBJS += pack-check.o diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 1dbb8a0692..9ed8fbbfad 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -21,6 +21,7 @@ #include "sigchain.h" #include "fsck.h" #include "tmp-objdir.h" +#include "oidset.h" static const char * const receive_pack_usage[] = { N_("git receive-pack <git-dir>"), @@ -250,8 +251,9 @@ static void show_ref(const char *path, const unsigned char *sha1) } static int show_ref_cb(const char *path_full, const struct object_id *oid, - int flag, void *unused) + int flag, void *data) { + struct oidset *seen = data; const char *path = strip_namespace(path_full); if (ref_is_hidden(path, path_full)) @@ -260,37 +262,38 @@ static int show_ref_cb(const char *path_full, const struct object_id *oid, /* * Advertise refs outside our current namespace as ".have" * refs, so that the client can use them to minimize data - * transfer but will otherwise ignore them. This happens to - * cover ".have" that are thrown in by add_one_alternate_ref() - * to mark histories that are complete in our alternates as - * well. + * transfer but will otherwise ignore them. */ - if (!path) + if (!path) { + if (oidset_insert(seen, oid)) + return 0; path = ".have"; + } else { + oidset_insert(seen, oid); + } show_ref(path, oid->hash); return 0; } -static int show_one_alternate_sha1(const unsigned char sha1[20], void *unused) +static void show_one_alternate_ref(const char *refname, + const struct object_id *oid, + void *data) { - show_ref(".have", sha1); - return 0; -} + struct oidset *seen = data; -static void collect_one_alternate_ref(const struct ref *ref, void *data) -{ - struct sha1_array *sa = data; - sha1_array_append(sa, ref->old_oid.hash); + if (oidset_insert(seen, oid)) + return; + + show_ref(".have", oid->hash); } static void write_head_info(void) { - struct sha1_array sa = SHA1_ARRAY_INIT; + static struct oidset seen = OIDSET_INIT; - for_each_alternate_ref(collect_one_alternate_ref, &sa); - sha1_array_for_each_unique(&sa, show_one_alternate_sha1, NULL); - sha1_array_clear(&sa); - for_each_ref(show_ref_cb, NULL); + for_each_ref(show_ref_cb, &seen); + for_each_alternate_ref(show_one_alternate_ref, &seen); + oidset_clear(&seen); if (!sent_capabilities) show_ref("capabilities^{}", null_sha1); diff --git a/fetch-pack.c b/fetch-pack.c index 601f0779a1..e0f5d5ce87 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -35,6 +35,7 @@ static const char *alternate_shallow_file; #define COMMON_REF (1U << 2) #define SEEN (1U << 3) #define POPPED (1U << 4) +#define ALTERNATE (1U << 5) static int marked; @@ -67,6 +68,41 @@ static inline void print_verbose(const struct fetch_pack_args *args, fputc('\n', stderr); } +struct alternate_object_cache { + struct object **items; + size_t nr, alloc; +}; + +static void cache_one_alternate(const char *refname, + const struct object_id *oid, + void *vcache) +{ + struct alternate_object_cache *cache = vcache; + struct object *obj = parse_object(oid->hash); + + if (!obj || (obj->flags & ALTERNATE)) + return; + + obj->flags |= ALTERNATE; + ALLOC_GROW(cache->items, cache->nr + 1, cache->alloc); + cache->items[cache->nr++] = obj; +} + +static void for_each_cached_alternate(void (*cb)(struct object *)) +{ + static int initialized; + static struct alternate_object_cache cache; + size_t i; + + if (!initialized) { + for_each_alternate_ref(cache_one_alternate, &cache); + initialized = 1; + } + + for (i = 0; i < cache.nr; i++) + cb(cache.items[i]); +} + static void rev_list_push(struct commit *commit, int mark) { if (!(commit->object.flags & mark)) { @@ -253,9 +289,9 @@ static void send_request(struct fetch_pack_args *args, write_or_die(fd, buf->buf, buf->len); } -static void insert_one_alternate_ref(const struct ref *ref, void *unused) +static void insert_one_alternate_object(struct object *obj) { - rev_list_insert_ref(NULL, ref->old_oid.hash); + rev_list_insert_ref(NULL, obj->oid.hash); } #define INITIAL_FLUSH 16 @@ -298,7 +334,7 @@ static int find_common(struct fetch_pack_args *args, marked = 1; for_each_ref(rev_list_insert_ref_oid, NULL); - for_each_alternate_ref(insert_one_alternate_ref, NULL); + for_each_cached_alternate(insert_one_alternate_object); fetching = 0; for ( ; refs ; refs = refs->next) { @@ -619,9 +655,9 @@ static void filter_refs(struct fetch_pack_args *args, *refs = newlist; } -static void mark_alternate_complete(const struct ref *ref, void *unused) +static void mark_alternate_complete(struct object *obj) { - mark_complete(ref->old_oid.hash); + mark_complete(obj->oid.hash); } static int everything_local(struct fetch_pack_args *args, @@ -657,7 +693,7 @@ static int everything_local(struct fetch_pack_args *args, if (!args->deepen) { for_each_ref(mark_complete_oid, NULL); - for_each_alternate_ref(mark_alternate_complete, NULL); + for_each_cached_alternate(mark_alternate_complete); commit_list_sort_by_date(&complete); if (cutoff) mark_recent_complete_commits(args, cutoff); @@ -29,7 +29,7 @@ struct object_array { /* * object flag allocation: * revision.h: 0---------10 26 - * fetch-pack.c: 0---4 + * fetch-pack.c: 0---5 * walker.c: 0-2 * upload-pack.c: 4 11----------------19 * builtin/blame.c: 12-13 diff --git a/oidset.c b/oidset.c new file mode 100644 index 0000000000..ac169f05d3 --- /dev/null +++ b/oidset.c @@ -0,0 +1,49 @@ +#include "cache.h" +#include "oidset.h" + +struct oidset_entry { + struct hashmap_entry hash; + struct object_id oid; +}; + +static int oidset_hashcmp(const void *va, const void *vb, + const void *vkey) +{ + const struct oidset_entry *a = va, *b = vb; + const struct object_id *key = vkey; + return oidcmp(&a->oid, key ? key : &b->oid); +} + +int oidset_contains(const struct oidset *set, const struct object_id *oid) +{ + struct hashmap_entry key; + + if (!set->map.cmpfn) + return 0; + + hashmap_entry_init(&key, sha1hash(oid->hash)); + return !!hashmap_get(&set->map, &key, oid); +} + +int oidset_insert(struct oidset *set, const struct object_id *oid) +{ + struct oidset_entry *entry; + + if (!set->map.cmpfn) + hashmap_init(&set->map, oidset_hashcmp, 0); + + if (oidset_contains(set, oid)) + return 1; + + entry = xmalloc(sizeof(*entry)); + hashmap_entry_init(&entry->hash, sha1hash(oid->hash)); + oidcpy(&entry->oid, oid); + + hashmap_add(&set->map, entry); + return 0; +} + +void oidset_clear(struct oidset *set) +{ + hashmap_free(&set->map, 1); +} diff --git a/oidset.h b/oidset.h new file mode 100644 index 0000000000..b7eaab5b88 --- /dev/null +++ b/oidset.h @@ -0,0 +1,45 @@ +#ifndef OIDSET_H +#define OIDSET_H + +/** + * This API is similar to sha1-array, in that it maintains a set of object ids + * in a memory-efficient way. The major differences are: + * + * 1. It uses a hash, so we can do online duplicate removal, rather than + * sort-and-uniq at the end. This can reduce memory footprint if you have + * a large list of oids with many duplicates. + * + * 2. The per-unique-oid memory footprint is slightly higher due to hash + * table overhead. + */ + +/** + * A single oidset; should be zero-initialized (or use OIDSET_INIT). + */ +struct oidset { + struct hashmap map; +}; + +#define OIDSET_INIT { { NULL } } + +/** + * Returns true iff `set` contains `oid`. + */ +int oidset_contains(const struct oidset *set, const struct object_id *oid); + +/** + * Insert the oid into the set; a copy is made, so "oid" does not need + * to persist after this function is called. + * + * Returns 1 if the oid was already in the set, 0 otherwise. This can be used + * to perform an efficient check-and-add. + */ +int oidset_insert(struct oidset *set, const struct object_id *oid); + +/** + * Remove all entries from the oidset, freeing any resources associated with + * it. + */ +void oidset_clear(struct oidset *set); + +#endif /* OIDSET_H */ diff --git a/t/t5400-send-pack.sh b/t/t5400-send-pack.sh index 305ca7a930..3331e0f534 100755 --- a/t/t5400-send-pack.sh +++ b/t/t5400-send-pack.sh @@ -255,4 +255,42 @@ test_expect_success 'deny pushing to delete current branch' ' ) ' +extract_ref_advertisement () { + perl -lne ' + # \\ is there to skip capabilities after \0 + /push< ([^\\]+)/ or next; + exit 0 if $1 eq "0000"; + print $1; + ' +} + +test_expect_success 'receive-pack de-dupes .have lines' ' + git init shared && + git -C shared commit --allow-empty -m both && + git clone -s shared fork && + ( + cd shared && + git checkout -b only-shared && + git commit --allow-empty -m only-shared && + git update-ref refs/heads/foo HEAD + ) && + + # Notable things in this expectation: + # - local refs are not de-duped + # - .have does not duplicate locals + # - .have does not duplicate itself + local=$(git -C fork rev-parse HEAD) && + shared=$(git -C shared rev-parse only-shared) && + cat >expect <<-EOF && + $local refs/heads/master + $local refs/remotes/origin/HEAD + $local refs/remotes/origin/master + $shared .have + EOF + + GIT_TRACE_PACKET=$(pwd)/trace git push fork HEAD:foo && + extract_ref_advertisement <trace >refs && + test_cmp expect refs +' + test_done diff --git a/transport.c b/transport.c index d72e089484..48864b3a9e 100644 --- a/transport.c +++ b/transport.c @@ -1206,6 +1206,42 @@ literal_copy: return xstrdup(url); } +static void read_alternate_refs(const char *path, + alternate_ref_fn *cb, + void *data) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + struct strbuf line = STRBUF_INIT; + FILE *fh; + + cmd.git_cmd = 1; + argv_array_pushf(&cmd.args, "--git-dir=%s", path); + argv_array_push(&cmd.args, "for-each-ref"); + argv_array_push(&cmd.args, "--format=%(objectname) %(refname)"); + cmd.env = local_repo_env; + cmd.out = -1; + + if (start_command(&cmd)) + return; + + fh = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, fh) != EOF) { + struct object_id oid; + + if (get_oid_hex(line.buf, &oid) || + line.buf[GIT_SHA1_HEXSZ] != ' ') { + warning("invalid line while parsing alternate refs: %s", + line.buf); + break; + } + + cb(line.buf + GIT_SHA1_HEXSZ + 1, &oid, data); + } + + fclose(fh); + finish_command(&cmd); +} + struct alternate_refs_data { alternate_ref_fn *fn; void *data; @@ -1214,34 +1250,26 @@ struct alternate_refs_data { static int refs_from_alternate_cb(struct alternate_object_database *e, void *data) { - char *other; - size_t len; - struct remote *remote; - struct transport *transport; - const struct ref *extra; + struct strbuf path = STRBUF_INIT; + size_t base_len; struct alternate_refs_data *cb = data; - other = real_pathdup(e->path); - len = strlen(other); - - while (other[len-1] == '/') - other[--len] = '\0'; - if (len < 8 || memcmp(other + len - 8, "/objects", 8)) + if (!strbuf_realpath(&path, e->path, 0)) goto out; + if (!strbuf_strip_suffix(&path, "/objects")) + goto out; + base_len = path.len; + /* Is this a git repository with refs? */ - memcpy(other + len - 8, "/refs", 6); - if (!is_directory(other)) + strbuf_addstr(&path, "/refs"); + if (!is_directory(path.buf)) goto out; - other[len - 8] = '\0'; - remote = remote_get(other); - transport = transport_get(remote, other); - for (extra = transport_get_remote_refs(transport); - extra; - extra = extra->next) - cb->fn(extra, cb->data); - transport_disconnect(transport); + strbuf_setlen(&path, base_len); + + read_alternate_refs(path.buf, cb->fn, cb->data); + out: - free(other); + strbuf_release(&path); return 0; } diff --git a/transport.h b/transport.h index e597b31b38..bc5571574b 100644 --- a/transport.h +++ b/transport.h @@ -255,6 +255,6 @@ int transport_refs_pushed(struct ref *ref); void transport_print_push_status(const char *dest, struct ref *refs, int verbose, int porcelain, unsigned int *reject_reasons); -typedef void alternate_ref_fn(const struct ref *, void *); +typedef void alternate_ref_fn(const char *refname, const struct object_id *oid, void *); extern void for_each_alternate_ref(alternate_ref_fn, void *); #endif |