diff options
author | Junio C Hamano <gitster@pobox.com> | 2018-08-02 15:30:46 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2018-08-02 15:30:46 -0700 |
commit | 7c85ee6c5818f4409a29e40cc406ef5db9560af8 (patch) | |
tree | 3ae0b2267d235f9bf4f81dd1a7d781e980a953ce | |
parent | Merge branch 'jm/send-email-tls-auth-on-batch' (diff) | |
parent | negotiator/skipping: skip commits during fetch (diff) | |
download | tgif-7c85ee6c5818f4409a29e40cc406ef5db9560af8.tar.xz |
Merge branch 'jt/fetch-negotiator-skipping'
Add a server-side knob to skip commits in exponential/fibbonacci
stride in an attempt to cover wider swath of history with a smaller
number of iterations, potentially accepting a larger packfile
transfer, instead of going back one commit a time during common
ancestor discovery during the "git fetch" transaction.
* jt/fetch-negotiator-skipping:
negotiator/skipping: skip commits during fetch
-rw-r--r-- | Documentation/config.txt | 9 | ||||
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | fetch-negotiator.c | 8 | ||||
-rw-r--r-- | fetch-negotiator.h | 3 | ||||
-rw-r--r-- | fetch-pack.c | 7 | ||||
-rw-r--r-- | negotiator/skipping.c | 250 | ||||
-rw-r--r-- | negotiator/skipping.h | 8 | ||||
-rwxr-xr-x | t/t5552-skipping-fetch-negotiator.sh | 179 |
8 files changed, 461 insertions, 4 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index 44df580a81..63365dcf3d 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1531,6 +1531,15 @@ fetch.output:: `full` and `compact`. Default value is `full`. See section OUTPUT in linkgit:git-fetch[1] for detail. +fetch.negotiationAlgorithm:: + Control how information about the commits in the local repository is + sent when negotiating the contents of the packfile to be sent by the + server. Set to "skipping" to use an algorithm that skips commits in an + effort to converge faster, but may result in a larger-than-necessary + packfile; any other value instructs Git to use the default algorithm + that never skips commits (unless the server has acknowledged it or one + of its descendants). + format.attach:: Enable multipart/mixed attachments as the default for 'format-patch'. The value can also be a double quoted string @@ -893,6 +893,7 @@ LIB_OBJS += merge-recursive.o LIB_OBJS += mergesort.o LIB_OBJS += name-hash.o LIB_OBJS += negotiator/default.o +LIB_OBJS += negotiator/skipping.o LIB_OBJS += notes.o LIB_OBJS += notes-cache.o LIB_OBJS += notes-merge.o diff --git a/fetch-negotiator.c b/fetch-negotiator.c index 2675d120fe..5d283049f4 100644 --- a/fetch-negotiator.c +++ b/fetch-negotiator.c @@ -1,8 +1,14 @@ #include "git-compat-util.h" #include "fetch-negotiator.h" #include "negotiator/default.h" +#include "negotiator/skipping.h" -void fetch_negotiator_init(struct fetch_negotiator *negotiator) +void fetch_negotiator_init(struct fetch_negotiator *negotiator, + const char *algorithm) { + if (algorithm && !strcmp(algorithm, "skipping")) { + skipping_negotiator_init(negotiator); + return; + } default_negotiator_init(negotiator); } diff --git a/fetch-negotiator.h b/fetch-negotiator.h index b1290aa9c6..ddb44a22dc 100644 --- a/fetch-negotiator.h +++ b/fetch-negotiator.h @@ -52,6 +52,7 @@ struct fetch_negotiator { void *data; }; -void fetch_negotiator_init(struct fetch_negotiator *negotiator); +void fetch_negotiator_init(struct fetch_negotiator *negotiator, + const char *algorithm); #endif diff --git a/fetch-pack.c b/fetch-pack.c index 0a9d184adb..5714bcbddd 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -35,6 +35,7 @@ static int agent_supported; static int server_supports_filtering; static struct lock_file shallow_lock; static const char *alternate_shallow_file; +static char *negotiation_algorithm; /* Remember to update object flag allocation in object.h */ #define COMPLETE (1U << 0) @@ -913,7 +914,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, const char *agent_feature; int agent_len; struct fetch_negotiator negotiator; - fetch_negotiator_init(&negotiator); + fetch_negotiator_init(&negotiator, negotiation_algorithm); sort_ref_list(&ref, ref_compare_name); QSORT(sought, nr_sought, cmp_ref_by_name); @@ -1324,7 +1325,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, int in_vain = 0; int haves_to_send = INITIAL_FLUSH; struct fetch_negotiator negotiator; - fetch_negotiator_init(&negotiator); + fetch_negotiator_init(&negotiator, negotiation_algorithm); packet_reader_init(&reader, fd[0], NULL, 0, PACKET_READ_CHOMP_NEWLINE); @@ -1406,6 +1407,8 @@ static void fetch_pack_config(void) git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta); git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects); git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects); + git_config_get_string("fetch.negotiationalgorithm", + &negotiation_algorithm); git_config(git_default_config, NULL); } diff --git a/negotiator/skipping.c b/negotiator/skipping.c new file mode 100644 index 0000000000..dffbc76c49 --- /dev/null +++ b/negotiator/skipping.c @@ -0,0 +1,250 @@ +#include "cache.h" +#include "skipping.h" +#include "../commit.h" +#include "../fetch-negotiator.h" +#include "../prio-queue.h" +#include "../refs.h" +#include "../tag.h" + +/* Remember to update object flag allocation in object.h */ +/* + * Both us and the server know that both parties have this object. + */ +#define COMMON (1U << 2) +/* + * The server has told us that it has this object. We still need to tell the + * server that we have this object (or one of its descendants), but since we are + * going to do that, we do not need to tell the server about its ancestors. + */ +#define ADVERTISED (1U << 3) +/* + * This commit has entered the priority queue. + */ +#define SEEN (1U << 4) +/* + * This commit has left the priority queue. + */ +#define POPPED (1U << 5) + +static int marked; + +/* + * An entry in the priority queue. + */ +struct entry { + struct commit *commit; + + /* + * Used only if commit is not COMMON. + */ + uint16_t original_ttl; + uint16_t ttl; +}; + +struct data { + struct prio_queue rev_list; + + /* + * The number of non-COMMON commits in rev_list. + */ + int non_common_revs; +}; + +static int compare(const void *a_, const void *b_, void *unused) +{ + const struct entry *a = a_; + const struct entry *b = b_; + return compare_commits_by_commit_date(a->commit, b->commit, NULL); +} + +static struct entry *rev_list_push(struct data *data, struct commit *commit, int mark) +{ + struct entry *entry; + commit->object.flags |= mark | SEEN; + + entry = xcalloc(1, sizeof(*entry)); + entry->commit = commit; + prio_queue_put(&data->rev_list, entry); + + if (!(mark & COMMON)) + data->non_common_revs++; + return entry; +} + +static int clear_marks(const char *refname, const struct object_id *oid, + int flag, void *cb_data) +{ + struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0); + + if (o && o->type == OBJ_COMMIT) + clear_commit_marks((struct commit *)o, + COMMON | ADVERTISED | SEEN | POPPED); + return 0; +} + +/* + * Mark this SEEN commit and all its SEEN ancestors as COMMON. + */ +static void mark_common(struct data *data, struct commit *c) +{ + struct commit_list *p; + + if (c->object.flags & COMMON) + return; + c->object.flags |= COMMON; + if (!(c->object.flags & POPPED)) + data->non_common_revs--; + + if (!c->object.parsed) + return; + for (p = c->parents; p; p = p->next) { + if (p->item->object.flags & SEEN) + mark_common(data, p->item); + } +} + +/* + * Ensure that the priority queue has an entry for to_push, and ensure that the + * entry has the correct flags and ttl. + * + * This function returns 1 if an entry was found or created, and 0 otherwise + * (because the entry for this commit had already been popped). + */ +static int push_parent(struct data *data, struct entry *entry, + struct commit *to_push) +{ + struct entry *parent_entry; + + if (to_push->object.flags & SEEN) { + int i; + if (to_push->object.flags & POPPED) + /* + * The entry for this commit has already been popped, + * due to clock skew. Pretend that this parent does not + * exist. + */ + return 0; + /* + * Find the existing entry and use it. + */ + for (i = 0; i < data->rev_list.nr; i++) { + parent_entry = data->rev_list.array[i].data; + if (parent_entry->commit == to_push) + goto parent_found; + } + BUG("missing parent in priority queue"); +parent_found: + ; + } else { + parent_entry = rev_list_push(data, to_push, 0); + } + + if (entry->commit->object.flags & (COMMON | ADVERTISED)) { + mark_common(data, to_push); + } else { + uint16_t new_original_ttl = entry->ttl + ? entry->original_ttl : entry->original_ttl * 3 / 2 + 1; + uint16_t new_ttl = entry->ttl + ? entry->ttl - 1 : new_original_ttl; + if (parent_entry->original_ttl < new_original_ttl) { + parent_entry->original_ttl = new_original_ttl; + parent_entry->ttl = new_ttl; + } + } + + return 1; +} + +static const struct object_id *get_rev(struct data *data) +{ + struct commit *to_send = NULL; + + while (to_send == NULL) { + struct entry *entry; + struct commit *commit; + struct commit_list *p; + int parent_pushed = 0; + + if (data->rev_list.nr == 0 || data->non_common_revs == 0) + return NULL; + + entry = prio_queue_get(&data->rev_list); + commit = entry->commit; + commit->object.flags |= POPPED; + if (!(commit->object.flags & COMMON)) + data->non_common_revs--; + + if (!(commit->object.flags & COMMON) && !entry->ttl) + to_send = commit; + + parse_commit(commit); + for (p = commit->parents; p; p = p->next) + parent_pushed |= push_parent(data, entry, p->item); + + if (!(commit->object.flags & COMMON) && !parent_pushed) + /* + * This commit has no parents, or all of its parents + * have already been popped (due to clock skew), so send + * it anyway. + */ + to_send = commit; + + free(entry); + } + + return &to_send->object.oid; +} + +static void known_common(struct fetch_negotiator *n, struct commit *c) +{ + if (c->object.flags & SEEN) + return; + rev_list_push(n->data, c, ADVERTISED); +} + +static void add_tip(struct fetch_negotiator *n, struct commit *c) +{ + n->known_common = NULL; + if (c->object.flags & SEEN) + return; + rev_list_push(n->data, c, 0); +} + +static const struct object_id *next(struct fetch_negotiator *n) +{ + n->known_common = NULL; + n->add_tip = NULL; + return get_rev(n->data); +} + +static int ack(struct fetch_negotiator *n, struct commit *c) +{ + int known_to_be_common = !!(c->object.flags & COMMON); + if (!(c->object.flags & SEEN)) + die("received ack for commit %s not sent as 'have'\n", + oid_to_hex(&c->object.oid)); + mark_common(n->data, c); + return known_to_be_common; +} + +static void release(struct fetch_negotiator *n) +{ + clear_prio_queue(&((struct data *)n->data)->rev_list); + FREE_AND_NULL(n->data); +} + +void skipping_negotiator_init(struct fetch_negotiator *negotiator) +{ + struct data *data; + negotiator->known_common = known_common; + negotiator->add_tip = add_tip; + negotiator->next = next; + negotiator->ack = ack; + negotiator->release = release; + negotiator->data = data = xcalloc(1, sizeof(*data)); + data->rev_list.compare = compare; + + if (marked) + for_each_ref(clear_marks, NULL); + marked = 1; +} diff --git a/negotiator/skipping.h b/negotiator/skipping.h new file mode 100644 index 0000000000..d7dfa6c6e4 --- /dev/null +++ b/negotiator/skipping.h @@ -0,0 +1,8 @@ +#ifndef NEGOTIATOR_SKIPPING_H +#define NEGOTIATOR_SKIPPING_H + +struct fetch_negotiator; + +void skipping_negotiator_init(struct fetch_negotiator *negotiator); + +#endif diff --git a/t/t5552-skipping-fetch-negotiator.sh b/t/t5552-skipping-fetch-negotiator.sh new file mode 100755 index 0000000000..0a8e0e42ed --- /dev/null +++ b/t/t5552-skipping-fetch-negotiator.sh @@ -0,0 +1,179 @@ +#!/bin/sh + +test_description='test skipping fetch negotiator' +. ./test-lib.sh + +have_sent () { + while test "$#" -ne 0 + do + grep "fetch> have $(git -C client rev-parse $1)" trace + if test $? -ne 0 + then + echo "No have $(git -C client rev-parse $1) ($1)" + return 1 + fi + shift + done +} + +have_not_sent () { + while test "$#" -ne 0 + do + grep "fetch> have $(git -C client rev-parse $1)" trace + if test $? -eq 0 + then + return 1 + fi + shift + done +} + +test_expect_success 'commits with no parents are sent regardless of skip distance' ' + git init server && + test_commit -C server to_fetch && + + git init client && + for i in $(seq 7) + do + test_commit -C client c$i + done && + + # We send: "c7" (skip 1) "c5" (skip 2) "c2" (skip 4). After that, since + # "c1" has no parent, it is still sent as "have" even though it would + # normally be skipped. + test_config -C client fetch.negotiationalgorithm skipping && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" && + have_sent c7 c5 c2 c1 && + have_not_sent c6 c4 c3 +' + +test_expect_success 'when two skips collide, favor the larger one' ' + rm -rf server client trace && + git init server && + test_commit -C server to_fetch && + + git init client && + for i in $(seq 11) + do + test_commit -C client c$i + done && + git -C client checkout c5 && + test_commit -C client c5side && + + # Before reaching c5, we send "c5side" (skip 1) and "c11" (skip 1) "c9" + # (skip 2) "c6" (skip 4). The larger skip (skip 4) takes precedence, so + # the next "have" sent will be "c1" (from "c6" skip 4) and not "c4" + # (from "c5side" skip 1). + test_config -C client fetch.negotiationalgorithm skipping && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" && + have_sent c5side c11 c9 c6 c1 && + have_not_sent c10 c8 c7 c5 c4 c3 c2 +' + +test_expect_success 'use ref advertisement to filter out commits' ' + rm -rf server client trace && + git init server && + test_commit -C server c1 && + test_commit -C server c2 && + test_commit -C server c3 && + git -C server tag -d c1 c2 c3 && + + git clone server client && + test_commit -C client c4 && + test_commit -C client c5 && + git -C client checkout c4^^ && + test_commit -C client c2side && + + git -C server checkout --orphan anotherbranch && + test_commit -C server to_fetch && + + # The server advertising "c3" (as "refs/heads/master") means that we do + # not need to send any ancestors of "c3", but we still need to send "c3" + # itself. + test_config -C client fetch.negotiationalgorithm skipping && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch origin to_fetch && + have_sent c5 c4^ c2side && + have_not_sent c4 c4^^ c4^^^ +' + +test_expect_success 'handle clock skew' ' + rm -rf server client trace && + git init server && + test_commit -C server to_fetch && + + git init client && + + # 2 regular commits + test_tick=2000000000 && + test_commit -C client c1 && + test_commit -C client c2 && + + # 4 old commits + test_tick=1000000000 && + git -C client checkout c1 && + test_commit -C client old1 && + test_commit -C client old2 && + test_commit -C client old3 && + test_commit -C client old4 && + + # "c2" and "c1" are popped first, then "old4" to "old1". "old1" would + # normally be skipped, but is treated as a commit without a parent here + # and sent, because (due to clock skew) its only parent has already been + # popped off the priority queue. + test_config -C client fetch.negotiationalgorithm skipping && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" && + have_sent c2 c1 old4 old2 old1 && + have_not_sent old3 +' + +test_expect_success 'do not send "have" with ancestors of commits that server ACKed' ' + rm -rf server client trace && + git init server && + test_commit -C server to_fetch && + + git init client && + for i in $(seq 8) + do + git -C client checkout --orphan b$i && + test_commit -C client b$i.c0 + done && + for j in $(seq 19) + do + for i in $(seq 8) + do + git -C client checkout b$i && + test_commit -C client b$i.c$j + done + done && + + # Copy this branch over to the server and add a commit on it so that it + # is reachable but not advertised. + git -C server fetch --no-tags "$(pwd)/client" b1:refs/heads/b1 && + git -C server checkout b1 && + test_commit -C server commit-on-b1 && + + test_config -C client fetch.negotiationalgorithm skipping && + GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" to_fetch && + grep " fetch" trace && + + # fetch-pack sends 2 requests each containing 16 "have" lines before + # processing the first response. In these 2 requests, 4 commits from + # each branch are sent. Just check the first branch. + have_sent b1.c19 b1.c17 b1.c14 b1.c9 && + have_not_sent b1.c18 b1.c16 b1.c15 b1.c13 b1.c12 b1.c11 b1.c10 && + + # While fetch-pack is processing the first response, it should read that + # the server ACKs b1.c19 and b1.c17. + grep "fetch< ACK $(git -C client rev-parse b1.c19) common" trace && + grep "fetch< ACK $(git -C client rev-parse b1.c17) common" trace && + + # fetch-pack should thus not send any more commits in the b1 branch, but + # should still send the others (in this test, just check b2). + for i in $(seq 0 8) + do + have_not_sent b1.c$i + done && + have_sent b2.c1 b2.c0 +' + +test_done |