summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2018-08-02 15:30:46 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2018-08-02 15:30:46 -0700
commit7c85ee6c5818f4409a29e40cc406ef5db9560af8 (patch)
tree3ae0b2267d235f9bf4f81dd1a7d781e980a953ce
parentMerge branch 'jm/send-email-tls-auth-on-batch' (diff)
parentnegotiator/skipping: skip commits during fetch (diff)
downloadtgif-7c85ee6c5818f4409a29e40cc406ef5db9560af8.tar.xz
Merge branch 'jt/fetch-negotiator-skipping'
Add a server-side knob to skip commits in exponential/fibbonacci stride in an attempt to cover wider swath of history with a smaller number of iterations, potentially accepting a larger packfile transfer, instead of going back one commit a time during common ancestor discovery during the "git fetch" transaction. * jt/fetch-negotiator-skipping: negotiator/skipping: skip commits during fetch
-rw-r--r--Documentation/config.txt9
-rw-r--r--Makefile1
-rw-r--r--fetch-negotiator.c8
-rw-r--r--fetch-negotiator.h3
-rw-r--r--fetch-pack.c7
-rw-r--r--negotiator/skipping.c250
-rw-r--r--negotiator/skipping.h8
-rwxr-xr-xt/t5552-skipping-fetch-negotiator.sh179
8 files changed, 461 insertions, 4 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 44df580a81..63365dcf3d 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -1531,6 +1531,15 @@ fetch.output::
`full` and `compact`. Default value is `full`. See section
OUTPUT in linkgit:git-fetch[1] for detail.
+fetch.negotiationAlgorithm::
+ Control how information about the commits in the local repository is
+ sent when negotiating the contents of the packfile to be sent by the
+ server. Set to "skipping" to use an algorithm that skips commits in an
+ effort to converge faster, but may result in a larger-than-necessary
+ packfile; any other value instructs Git to use the default algorithm
+ that never skips commits (unless the server has acknowledged it or one
+ of its descendants).
+
format.attach::
Enable multipart/mixed attachments as the default for
'format-patch'. The value can also be a double quoted string
diff --git a/Makefile b/Makefile
index ab5f603798..74588d3dda 100644
--- a/Makefile
+++ b/Makefile
@@ -893,6 +893,7 @@ LIB_OBJS += merge-recursive.o
LIB_OBJS += mergesort.o
LIB_OBJS += name-hash.o
LIB_OBJS += negotiator/default.o
+LIB_OBJS += negotiator/skipping.o
LIB_OBJS += notes.o
LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o
diff --git a/fetch-negotiator.c b/fetch-negotiator.c
index 2675d120fe..5d283049f4 100644
--- a/fetch-negotiator.c
+++ b/fetch-negotiator.c
@@ -1,8 +1,14 @@
#include "git-compat-util.h"
#include "fetch-negotiator.h"
#include "negotiator/default.h"
+#include "negotiator/skipping.h"
-void fetch_negotiator_init(struct fetch_negotiator *negotiator)
+void fetch_negotiator_init(struct fetch_negotiator *negotiator,
+ const char *algorithm)
{
+ if (algorithm && !strcmp(algorithm, "skipping")) {
+ skipping_negotiator_init(negotiator);
+ return;
+ }
default_negotiator_init(negotiator);
}
diff --git a/fetch-negotiator.h b/fetch-negotiator.h
index b1290aa9c6..ddb44a22dc 100644
--- a/fetch-negotiator.h
+++ b/fetch-negotiator.h
@@ -52,6 +52,7 @@ struct fetch_negotiator {
void *data;
};
-void fetch_negotiator_init(struct fetch_negotiator *negotiator);
+void fetch_negotiator_init(struct fetch_negotiator *negotiator,
+ const char *algorithm);
#endif
diff --git a/fetch-pack.c b/fetch-pack.c
index 0a9d184adb..5714bcbddd 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -35,6 +35,7 @@ static int agent_supported;
static int server_supports_filtering;
static struct lock_file shallow_lock;
static const char *alternate_shallow_file;
+static char *negotiation_algorithm;
/* Remember to update object flag allocation in object.h */
#define COMPLETE (1U << 0)
@@ -913,7 +914,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
const char *agent_feature;
int agent_len;
struct fetch_negotiator negotiator;
- fetch_negotiator_init(&negotiator);
+ fetch_negotiator_init(&negotiator, negotiation_algorithm);
sort_ref_list(&ref, ref_compare_name);
QSORT(sought, nr_sought, cmp_ref_by_name);
@@ -1324,7 +1325,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
int in_vain = 0;
int haves_to_send = INITIAL_FLUSH;
struct fetch_negotiator negotiator;
- fetch_negotiator_init(&negotiator);
+ fetch_negotiator_init(&negotiator, negotiation_algorithm);
packet_reader_init(&reader, fd[0], NULL, 0,
PACKET_READ_CHOMP_NEWLINE);
@@ -1406,6 +1407,8 @@ static void fetch_pack_config(void)
git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta);
git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects);
git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects);
+ git_config_get_string("fetch.negotiationalgorithm",
+ &negotiation_algorithm);
git_config(git_default_config, NULL);
}
diff --git a/negotiator/skipping.c b/negotiator/skipping.c
new file mode 100644
index 0000000000..dffbc76c49
--- /dev/null
+++ b/negotiator/skipping.c
@@ -0,0 +1,250 @@
+#include "cache.h"
+#include "skipping.h"
+#include "../commit.h"
+#include "../fetch-negotiator.h"
+#include "../prio-queue.h"
+#include "../refs.h"
+#include "../tag.h"
+
+/* Remember to update object flag allocation in object.h */
+/*
+ * Both us and the server know that both parties have this object.
+ */
+#define COMMON (1U << 2)
+/*
+ * The server has told us that it has this object. We still need to tell the
+ * server that we have this object (or one of its descendants), but since we are
+ * going to do that, we do not need to tell the server about its ancestors.
+ */
+#define ADVERTISED (1U << 3)
+/*
+ * This commit has entered the priority queue.
+ */
+#define SEEN (1U << 4)
+/*
+ * This commit has left the priority queue.
+ */
+#define POPPED (1U << 5)
+
+static int marked;
+
+/*
+ * An entry in the priority queue.
+ */
+struct entry {
+ struct commit *commit;
+
+ /*
+ * Used only if commit is not COMMON.
+ */
+ uint16_t original_ttl;
+ uint16_t ttl;
+};
+
+struct data {
+ struct prio_queue rev_list;
+
+ /*
+ * The number of non-COMMON commits in rev_list.
+ */
+ int non_common_revs;
+};
+
+static int compare(const void *a_, const void *b_, void *unused)
+{
+ const struct entry *a = a_;
+ const struct entry *b = b_;
+ return compare_commits_by_commit_date(a->commit, b->commit, NULL);
+}
+
+static struct entry *rev_list_push(struct data *data, struct commit *commit, int mark)
+{
+ struct entry *entry;
+ commit->object.flags |= mark | SEEN;
+
+ entry = xcalloc(1, sizeof(*entry));
+ entry->commit = commit;
+ prio_queue_put(&data->rev_list, entry);
+
+ if (!(mark & COMMON))
+ data->non_common_revs++;
+ return entry;
+}
+
+static int clear_marks(const char *refname, const struct object_id *oid,
+ int flag, void *cb_data)
+{
+ struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0);
+
+ if (o && o->type == OBJ_COMMIT)
+ clear_commit_marks((struct commit *)o,
+ COMMON | ADVERTISED | SEEN | POPPED);
+ return 0;
+}
+
+/*
+ * Mark this SEEN commit and all its SEEN ancestors as COMMON.
+ */
+static void mark_common(struct data *data, struct commit *c)
+{
+ struct commit_list *p;
+
+ if (c->object.flags & COMMON)
+ return;
+ c->object.flags |= COMMON;
+ if (!(c->object.flags & POPPED))
+ data->non_common_revs--;
+
+ if (!c->object.parsed)
+ return;
+ for (p = c->parents; p; p = p->next) {
+ if (p->item->object.flags & SEEN)
+ mark_common(data, p->item);
+ }
+}
+
+/*
+ * Ensure that the priority queue has an entry for to_push, and ensure that the
+ * entry has the correct flags and ttl.
+ *
+ * This function returns 1 if an entry was found or created, and 0 otherwise
+ * (because the entry for this commit had already been popped).
+ */
+static int push_parent(struct data *data, struct entry *entry,
+ struct commit *to_push)
+{
+ struct entry *parent_entry;
+
+ if (to_push->object.flags & SEEN) {
+ int i;
+ if (to_push->object.flags & POPPED)
+ /*
+ * The entry for this commit has already been popped,
+ * due to clock skew. Pretend that this parent does not
+ * exist.
+ */
+ return 0;
+ /*
+ * Find the existing entry and use it.
+ */
+ for (i = 0; i < data->rev_list.nr; i++) {
+ parent_entry = data->rev_list.array[i].data;
+ if (parent_entry->commit == to_push)
+ goto parent_found;
+ }
+ BUG("missing parent in priority queue");
+parent_found:
+ ;
+ } else {
+ parent_entry = rev_list_push(data, to_push, 0);
+ }
+
+ if (entry->commit->object.flags & (COMMON | ADVERTISED)) {
+ mark_common(data, to_push);
+ } else {
+ uint16_t new_original_ttl = entry->ttl
+ ? entry->original_ttl : entry->original_ttl * 3 / 2 + 1;
+ uint16_t new_ttl = entry->ttl
+ ? entry->ttl - 1 : new_original_ttl;
+ if (parent_entry->original_ttl < new_original_ttl) {
+ parent_entry->original_ttl = new_original_ttl;
+ parent_entry->ttl = new_ttl;
+ }
+ }
+
+ return 1;
+}
+
+static const struct object_id *get_rev(struct data *data)
+{
+ struct commit *to_send = NULL;
+
+ while (to_send == NULL) {
+ struct entry *entry;
+ struct commit *commit;
+ struct commit_list *p;
+ int parent_pushed = 0;
+
+ if (data->rev_list.nr == 0 || data->non_common_revs == 0)
+ return NULL;
+
+ entry = prio_queue_get(&data->rev_list);
+ commit = entry->commit;
+ commit->object.flags |= POPPED;
+ if (!(commit->object.flags & COMMON))
+ data->non_common_revs--;
+
+ if (!(commit->object.flags & COMMON) && !entry->ttl)
+ to_send = commit;
+
+ parse_commit(commit);
+ for (p = commit->parents; p; p = p->next)
+ parent_pushed |= push_parent(data, entry, p->item);
+
+ if (!(commit->object.flags & COMMON) && !parent_pushed)
+ /*
+ * This commit has no parents, or all of its parents
+ * have already been popped (due to clock skew), so send
+ * it anyway.
+ */
+ to_send = commit;
+
+ free(entry);
+ }
+
+ return &to_send->object.oid;
+}
+
+static void known_common(struct fetch_negotiator *n, struct commit *c)
+{
+ if (c->object.flags & SEEN)
+ return;
+ rev_list_push(n->data, c, ADVERTISED);
+}
+
+static void add_tip(struct fetch_negotiator *n, struct commit *c)
+{
+ n->known_common = NULL;
+ if (c->object.flags & SEEN)
+ return;
+ rev_list_push(n->data, c, 0);
+}
+
+static const struct object_id *next(struct fetch_negotiator *n)
+{
+ n->known_common = NULL;
+ n->add_tip = NULL;
+ return get_rev(n->data);
+}
+
+static int ack(struct fetch_negotiator *n, struct commit *c)
+{
+ int known_to_be_common = !!(c->object.flags & COMMON);
+ if (!(c->object.flags & SEEN))
+ die("received ack for commit %s not sent as 'have'\n",
+ oid_to_hex(&c->object.oid));
+ mark_common(n->data, c);
+ return known_to_be_common;
+}
+
+static void release(struct fetch_negotiator *n)
+{
+ clear_prio_queue(&((struct data *)n->data)->rev_list);
+ FREE_AND_NULL(n->data);
+}
+
+void skipping_negotiator_init(struct fetch_negotiator *negotiator)
+{
+ struct data *data;
+ negotiator->known_common = known_common;
+ negotiator->add_tip = add_tip;
+ negotiator->next = next;
+ negotiator->ack = ack;
+ negotiator->release = release;
+ negotiator->data = data = xcalloc(1, sizeof(*data));
+ data->rev_list.compare = compare;
+
+ if (marked)
+ for_each_ref(clear_marks, NULL);
+ marked = 1;
+}
diff --git a/negotiator/skipping.h b/negotiator/skipping.h
new file mode 100644
index 0000000000..d7dfa6c6e4
--- /dev/null
+++ b/negotiator/skipping.h
@@ -0,0 +1,8 @@
+#ifndef NEGOTIATOR_SKIPPING_H
+#define NEGOTIATOR_SKIPPING_H
+
+struct fetch_negotiator;
+
+void skipping_negotiator_init(struct fetch_negotiator *negotiator);
+
+#endif
diff --git a/t/t5552-skipping-fetch-negotiator.sh b/t/t5552-skipping-fetch-negotiator.sh
new file mode 100755
index 0000000000..0a8e0e42ed
--- /dev/null
+++ b/t/t5552-skipping-fetch-negotiator.sh
@@ -0,0 +1,179 @@
+#!/bin/sh
+
+test_description='test skipping fetch negotiator'
+. ./test-lib.sh
+
+have_sent () {
+ while test "$#" -ne 0
+ do
+ grep "fetch> have $(git -C client rev-parse $1)" trace
+ if test $? -ne 0
+ then
+ echo "No have $(git -C client rev-parse $1) ($1)"
+ return 1
+ fi
+ shift
+ done
+}
+
+have_not_sent () {
+ while test "$#" -ne 0
+ do
+ grep "fetch> have $(git -C client rev-parse $1)" trace
+ if test $? -eq 0
+ then
+ return 1
+ fi
+ shift
+ done
+}
+
+test_expect_success 'commits with no parents are sent regardless of skip distance' '
+ git init server &&
+ test_commit -C server to_fetch &&
+
+ git init client &&
+ for i in $(seq 7)
+ do
+ test_commit -C client c$i
+ done &&
+
+ # We send: "c7" (skip 1) "c5" (skip 2) "c2" (skip 4). After that, since
+ # "c1" has no parent, it is still sent as "have" even though it would
+ # normally be skipped.
+ test_config -C client fetch.negotiationalgorithm skipping &&
+ GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
+ have_sent c7 c5 c2 c1 &&
+ have_not_sent c6 c4 c3
+'
+
+test_expect_success 'when two skips collide, favor the larger one' '
+ rm -rf server client trace &&
+ git init server &&
+ test_commit -C server to_fetch &&
+
+ git init client &&
+ for i in $(seq 11)
+ do
+ test_commit -C client c$i
+ done &&
+ git -C client checkout c5 &&
+ test_commit -C client c5side &&
+
+ # Before reaching c5, we send "c5side" (skip 1) and "c11" (skip 1) "c9"
+ # (skip 2) "c6" (skip 4). The larger skip (skip 4) takes precedence, so
+ # the next "have" sent will be "c1" (from "c6" skip 4) and not "c4"
+ # (from "c5side" skip 1).
+ test_config -C client fetch.negotiationalgorithm skipping &&
+ GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
+ have_sent c5side c11 c9 c6 c1 &&
+ have_not_sent c10 c8 c7 c5 c4 c3 c2
+'
+
+test_expect_success 'use ref advertisement to filter out commits' '
+ rm -rf server client trace &&
+ git init server &&
+ test_commit -C server c1 &&
+ test_commit -C server c2 &&
+ test_commit -C server c3 &&
+ git -C server tag -d c1 c2 c3 &&
+
+ git clone server client &&
+ test_commit -C client c4 &&
+ test_commit -C client c5 &&
+ git -C client checkout c4^^ &&
+ test_commit -C client c2side &&
+
+ git -C server checkout --orphan anotherbranch &&
+ test_commit -C server to_fetch &&
+
+ # The server advertising "c3" (as "refs/heads/master") means that we do
+ # not need to send any ancestors of "c3", but we still need to send "c3"
+ # itself.
+ test_config -C client fetch.negotiationalgorithm skipping &&
+ GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch origin to_fetch &&
+ have_sent c5 c4^ c2side &&
+ have_not_sent c4 c4^^ c4^^^
+'
+
+test_expect_success 'handle clock skew' '
+ rm -rf server client trace &&
+ git init server &&
+ test_commit -C server to_fetch &&
+
+ git init client &&
+
+ # 2 regular commits
+ test_tick=2000000000 &&
+ test_commit -C client c1 &&
+ test_commit -C client c2 &&
+
+ # 4 old commits
+ test_tick=1000000000 &&
+ git -C client checkout c1 &&
+ test_commit -C client old1 &&
+ test_commit -C client old2 &&
+ test_commit -C client old3 &&
+ test_commit -C client old4 &&
+
+ # "c2" and "c1" are popped first, then "old4" to "old1". "old1" would
+ # normally be skipped, but is treated as a commit without a parent here
+ # and sent, because (due to clock skew) its only parent has already been
+ # popped off the priority queue.
+ test_config -C client fetch.negotiationalgorithm skipping &&
+ GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
+ have_sent c2 c1 old4 old2 old1 &&
+ have_not_sent old3
+'
+
+test_expect_success 'do not send "have" with ancestors of commits that server ACKed' '
+ rm -rf server client trace &&
+ git init server &&
+ test_commit -C server to_fetch &&
+
+ git init client &&
+ for i in $(seq 8)
+ do
+ git -C client checkout --orphan b$i &&
+ test_commit -C client b$i.c0
+ done &&
+ for j in $(seq 19)
+ do
+ for i in $(seq 8)
+ do
+ git -C client checkout b$i &&
+ test_commit -C client b$i.c$j
+ done
+ done &&
+
+ # Copy this branch over to the server and add a commit on it so that it
+ # is reachable but not advertised.
+ git -C server fetch --no-tags "$(pwd)/client" b1:refs/heads/b1 &&
+ git -C server checkout b1 &&
+ test_commit -C server commit-on-b1 &&
+
+ test_config -C client fetch.negotiationalgorithm skipping &&
+ GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" to_fetch &&
+ grep " fetch" trace &&
+
+ # fetch-pack sends 2 requests each containing 16 "have" lines before
+ # processing the first response. In these 2 requests, 4 commits from
+ # each branch are sent. Just check the first branch.
+ have_sent b1.c19 b1.c17 b1.c14 b1.c9 &&
+ have_not_sent b1.c18 b1.c16 b1.c15 b1.c13 b1.c12 b1.c11 b1.c10 &&
+
+ # While fetch-pack is processing the first response, it should read that
+ # the server ACKs b1.c19 and b1.c17.
+ grep "fetch< ACK $(git -C client rev-parse b1.c19) common" trace &&
+ grep "fetch< ACK $(git -C client rev-parse b1.c17) common" trace &&
+
+ # fetch-pack should thus not send any more commits in the b1 branch, but
+ # should still send the others (in this test, just check b2).
+ for i in $(seq 0 8)
+ do
+ have_not_sent b1.c$i
+ done &&
+ have_sent b2.c1 b2.c0
+'
+
+test_done