diff options
author | Junio C Hamano <gitster@pobox.com> | 2018-05-02 18:54:10 +0900 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2018-05-02 18:54:10 +0900 |
commit | ea44c0a5940c183023797eb54cd7912709f421b0 (patch) | |
tree | de7d4b20b93c6d80a347a0841de56ff6bedf502e | |
parent | The fourth batch for 2.18 (diff) | |
parent | remote-curl: don't request v2 when pushing (diff) | |
download | tgif-ea44c0a5940c183023797eb54cd7912709f421b0.tar.xz |
Merge branch 'bw/protocol-v2' into jt/partial-clone-proto-v2
The beginning of the next-gen transfer protocol.
* bw/protocol-v2: (35 commits)
remote-curl: don't request v2 when pushing
remote-curl: implement stateless-connect command
http: eliminate "# service" line when using protocol v2
http: don't always add Git-Protocol header
http: allow providing extra headers for http requests
remote-curl: store the protocol version the server responded with
remote-curl: create copy of the service name
pkt-line: add packet_buf_write_len function
transport-helper: introduce stateless-connect
transport-helper: refactor process_connect_service
transport-helper: remove name parameter
connect: don't request v2 when pushing
connect: refactor git_connect to only get the protocol version once
fetch-pack: support shallow requests
fetch-pack: perform a fetch using v2
upload-pack: introduce fetch server command
push: pass ref prefixes when pushing
fetch: pass ref prefixes when fetching
ls-remote: pass ref prefixes when requesting a remote's refs
transport: convert transport_get_remote_refs to take a list of ref prefixes
...
44 files changed, 3318 insertions, 368 deletions
diff --git a/.gitignore b/.gitignore index 833ef3b0b7..2d0450c262 100644 --- a/.gitignore +++ b/.gitignore @@ -140,6 +140,7 @@ /git-rm /git-send-email /git-send-pack +/git-serve /git-sh-i18n /git-sh-i18n--envsubst /git-sh-setup diff --git a/Documentation/Makefile b/Documentation/Makefile index 6232143cb9..fa9e5c0acd 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -78,6 +78,7 @@ TECH_DOCS += technical/pack-heuristics TECH_DOCS += technical/pack-protocol TECH_DOCS += technical/protocol-capabilities TECH_DOCS += technical/protocol-common +TECH_DOCS += technical/protocol-v2 TECH_DOCS += technical/racy-git TECH_DOCS += technical/send-pack-pipeline TECH_DOCS += technical/shallow diff --git a/Documentation/gitremote-helpers.txt b/Documentation/gitremote-helpers.txt index 4b8c93ec59..9d1459aac6 100644 --- a/Documentation/gitremote-helpers.txt +++ b/Documentation/gitremote-helpers.txt @@ -102,6 +102,14 @@ Capabilities for Pushing + Supported commands: 'connect'. +'stateless-connect':: + Experimental; for internal use only. + Can attempt to connect to a remote server for communication + using git's wire-protocol version 2. See the documentation + for the stateless-connect command for more information. ++ +Supported commands: 'stateless-connect'. + 'push':: Can discover remote refs and push local commits and the history leading up to them to new or existing remote refs. @@ -136,6 +144,14 @@ Capabilities for Fetching + Supported commands: 'connect'. +'stateless-connect':: + Experimental; for internal use only. + Can attempt to connect to a remote server for communication + using git's wire-protocol version 2. See the documentation + for the stateless-connect command for more information. ++ +Supported commands: 'stateless-connect'. + 'fetch':: Can discover remote refs and transfer objects reachable from them to the local object store. @@ -375,6 +391,22 @@ Supported if the helper has the "export" capability. + Supported if the helper has the "connect" capability. +'stateless-connect' <service>:: + Experimental; for internal use only. + Connects to the given remote service for communication using + git's wire-protocol version 2. Valid replies to this command + are empty line (connection established), 'fallback' (no smart + transport support, fall back to dumb transports) and just + exiting with error message printed (can't connect, don't bother + trying to fall back). After line feed terminating the positive + (empty) response, the output of the service starts. Messages + (both request and response) must consist of zero or more + PKT-LINEs, terminating in a flush packet. The client must not + expect the server to store any state in between request-response + pairs. After the connection ends, the remote helper exits. ++ +Supported if the helper has the "stateless-connect" capability. + If a fatal error occurs, the program writes the error message to stderr and exits. The caller should expect that a suitable error message has been printed if the child closes the connection without diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt new file mode 100644 index 0000000000..136179d7d8 --- /dev/null +++ b/Documentation/technical/protocol-v2.txt @@ -0,0 +1,395 @@ + Git Wire Protocol, Version 2 +============================== + +This document presents a specification for a version 2 of Git's wire +protocol. Protocol v2 will improve upon v1 in the following ways: + + * Instead of multiple service names, multiple commands will be + supported by a single service + * Easily extendable as capabilities are moved into their own section + of the protocol, no longer being hidden behind a NUL byte and + limited by the size of a pkt-line + * Separate out other information hidden behind NUL bytes (e.g. agent + string as a capability and symrefs can be requested using 'ls-refs') + * Reference advertisement will be omitted unless explicitly requested + * ls-refs command to explicitly request some refs + * Designed with http and stateless-rpc in mind. With clear flush + semantics the http remote helper can simply act as a proxy + +In protocol v2 communication is command oriented. When first contacting a +server a list of capabilities will advertised. Some of these capabilities +will be commands which a client can request be executed. Once a command +has completed, a client can reuse the connection and request that other +commands be executed. + + Packet-Line Framing +--------------------- + +All communication is done using packet-line framing, just as in v1. See +`Documentation/technical/pack-protocol.txt` and +`Documentation/technical/protocol-common.txt` for more information. + +In protocol v2 these special packets will have the following semantics: + + * '0000' Flush Packet (flush-pkt) - indicates the end of a message + * '0001' Delimiter Packet (delim-pkt) - separates sections of a message + + Initial Client Request +------------------------ + +In general a client can request to speak protocol v2 by sending +`version=2` through the respective side-channel for the transport being +used which inevitably sets `GIT_PROTOCOL`. More information can be +found in `pack-protocol.txt` and `http-protocol.txt`. In all cases the +response from the server is the capability advertisement. + + Git Transport +~~~~~~~~~~~~~~~ + +When using the git:// transport, you can request to use protocol v2 by +sending "version=2" as an extra parameter: + + 003egit-upload-pack /project.git\0host=myserver.com\0\0version=2\0 + + SSH and File Transport +~~~~~~~~~~~~~~~~~~~~~~~~ + +When using either the ssh:// or file:// transport, the GIT_PROTOCOL +environment variable must be set explicitly to include "version=2". + + HTTP Transport +~~~~~~~~~~~~~~~~ + +When using the http:// or https:// transport a client makes a "smart" +info/refs request as described in `http-protocol.txt` and requests that +v2 be used by supplying "version=2" in the `Git-Protocol` header. + + C: Git-Protocol: version=2 + C: + C: GET $GIT_URL/info/refs?service=git-upload-pack HTTP/1.0 + +A v2 server would reply: + + S: 200 OK + S: <Some headers> + S: ... + S: + S: 000eversion 2\n + S: <capability-advertisement> + +Subsequent requests are then made directly to the service +`$GIT_URL/git-upload-pack`. (This works the same for git-receive-pack). + + Capability Advertisement +-------------------------- + +A server which decides to communicate (based on a request from a client) +using protocol version 2, notifies the client by sending a version string +in its initial response followed by an advertisement of its capabilities. +Each capability is a key with an optional value. Clients must ignore all +unknown keys. Semantics of unknown values are left to the definition of +each key. Some capabilities will describe commands which can be requested +to be executed by the client. + + capability-advertisement = protocol-version + capability-list + flush-pkt + + protocol-version = PKT-LINE("version 2" LF) + capability-list = *capability + capability = PKT-LINE(key[=value] LF) + + key = 1*(ALPHA | DIGIT | "-_") + value = 1*(ALPHA | DIGIT | " -_.,?\/{}[]()<>!@#$%^&*+=:;") + + Command Request +----------------- + +After receiving the capability advertisement, a client can then issue a +request to select the command it wants with any particular capabilities +or arguments. There is then an optional section where the client can +provide any command specific parameters or queries. Only a single +command can be requested at a time. + + request = empty-request | command-request + empty-request = flush-pkt + command-request = command + capability-list + [command-args] + flush-pkt + command = PKT-LINE("command=" key LF) + command-args = delim-pkt + *command-specific-arg + + command-specific-args are packet line framed arguments defined by + each individual command. + +The server will then check to ensure that the client's request is +comprised of a valid command as well as valid capabilities which were +advertised. If the request is valid the server will then execute the +command. A server MUST wait till it has received the client's entire +request before issuing a response. The format of the response is +determined by the command being executed, but in all cases a flush-pkt +indicates the end of the response. + +When a command has finished, and the client has received the entire +response from the server, a client can either request that another +command be executed or can terminate the connection. A client may +optionally send an empty request consisting of just a flush-pkt to +indicate that no more requests will be made. + + Capabilities +-------------- + +There are two different types of capabilities: normal capabilities, +which can be used to to convey information or alter the behavior of a +request, and commands, which are the core actions that a client wants to +perform (fetch, push, etc). + +Protocol version 2 is stateless by default. This means that all commands +must only last a single round and be stateless from the perspective of the +server side, unless the client has requested a capability indicating that +state should be maintained by the server. Clients MUST NOT require state +management on the server side in order to function correctly. This +permits simple round-robin load-balancing on the server side, without +needing to worry about state management. + + agent +~~~~~~~ + +The server can advertise the `agent` capability with a value `X` (in the +form `agent=X`) to notify the client that the server is running version +`X`. The client may optionally send its own agent string by including +the `agent` capability with a value `Y` (in the form `agent=Y`) in its +request to the server (but it MUST NOT do so if the server did not +advertise the agent capability). The `X` and `Y` strings may contain any +printable ASCII characters except space (i.e., the byte range 32 < x < +127), and are typically of the form "package/version" (e.g., +"git/1.8.3.1"). The agent strings are purely informative for statistics +and debugging purposes, and MUST NOT be used to programmatically assume +the presence or absence of particular features. + + ls-refs +~~~~~~~~~ + +`ls-refs` is the command used to request a reference advertisement in v2. +Unlike the current reference advertisement, ls-refs takes in arguments +which can be used to limit the refs sent from the server. + +Additional features not supported in the base command will be advertised +as the value of the command in the capability advertisement in the form +of a space separated list of features: "<command>=<feature 1> <feature 2>" + +ls-refs takes in the following arguments: + + symrefs + In addition to the object pointed by it, show the underlying ref + pointed by it when showing a symbolic ref. + peel + Show peeled tags. + ref-prefix <prefix> + When specified, only references having a prefix matching one of + the provided prefixes are displayed. + +The output of ls-refs is as follows: + + output = *ref + flush-pkt + ref = PKT-LINE(obj-id SP refname *(SP ref-attribute) LF) + ref-attribute = (symref | peeled) + symref = "symref-target:" symref-target + peeled = "peeled:" obj-id + + fetch +~~~~~~~ + +`fetch` is the command used to fetch a packfile in v2. It can be looked +at as a modified version of the v1 fetch where the ref-advertisement is +stripped out (since the `ls-refs` command fills that role) and the +message format is tweaked to eliminate redundancies and permit easy +addition of future extensions. + +Additional features not supported in the base command will be advertised +as the value of the command in the capability advertisement in the form +of a space separated list of features: "<command>=<feature 1> <feature 2>" + +A `fetch` request can take the following arguments: + + want <oid> + Indicates to the server an object which the client wants to + retrieve. Wants can be anything and are not limited to + advertised objects. + + have <oid> + Indicates to the server an object which the client has locally. + This allows the server to make a packfile which only contains + the objects that the client needs. Multiple 'have' lines can be + supplied. + + done + Indicates to the server that negotiation should terminate (or + not even begin if performing a clone) and that the server should + use the information supplied in the request to construct the + packfile. + + thin-pack + Request that a thin pack be sent, which is a pack with deltas + which reference base objects not contained within the pack (but + are known to exist at the receiving end). This can reduce the + network traffic significantly, but it requires the receiving end + to know how to "thicken" these packs by adding the missing bases + to the pack. + + no-progress + Request that progress information that would normally be sent on + side-band channel 2, during the packfile transfer, should not be + sent. However, the side-band channel 3 is still used for error + responses. + + include-tag + Request that annotated tags should be sent if the objects they + point to are being sent. + + ofs-delta + Indicate that the client understands PACKv2 with delta referring + to its base by position in pack rather than by an oid. That is, + they can read OBJ_OFS_DELTA (ake type 6) in a packfile. + +If the 'shallow' feature is advertised the following arguments can be +included in the clients request as well as the potential addition of the +'shallow-info' section in the server's response as explained below. + + shallow <oid> + A client must notify the server of all commits for which it only + has shallow copies (meaning that it doesn't have the parents of + a commit) by supplying a 'shallow <oid>' line for each such + object so that the server is aware of the limitations of the + client's history. This is so that the server is aware that the + client may not have all objects reachable from such commits. + + deepen <depth> + Requests that the fetch/clone should be shallow having a commit + depth of <depth> relative to the remote side. + + deepen-relative + Requests that the semantics of the "deepen" command be changed + to indicate that the depth requested is relative to the client's + current shallow boundary, instead of relative to the requested + commits. + + deepen-since <timestamp> + Requests that the shallow clone/fetch should be cut at a + specific time, instead of depth. Internally it's equivalent to + doing "git rev-list --max-age=<timestamp>". Cannot be used with + "deepen". + + deepen-not <rev> + Requests that the shallow clone/fetch should be cut at a + specific revision specified by '<rev>', instead of a depth. + Internally it's equivalent of doing "git rev-list --not <rev>". + Cannot be used with "deepen", but can be used with + "deepen-since". + +The response of `fetch` is broken into a number of sections separated by +delimiter packets (0001), with each section beginning with its section +header. + + output = *section + section = (acknowledgments | shallow-info | packfile) + (flush-pkt | delim-pkt) + + acknowledgments = PKT-LINE("acknowledgments" LF) + (nak | *ack) + (ready) + ready = PKT-LINE("ready" LF) + nak = PKT-LINE("NAK" LF) + ack = PKT-LINE("ACK" SP obj-id LF) + + shallow-info = PKT-LINE("shallow-info" LF) + *PKT-LINE((shallow | unshallow) LF) + shallow = "shallow" SP obj-id + unshallow = "unshallow" SP obj-id + + packfile = PKT-LINE("packfile" LF) + *PKT-LINE(%x01-03 *%x00-ff) + + acknowledgments section + * If the client determines that it is finished with negotiations + by sending a "done" line, the acknowledgments sections MUST be + omitted from the server's response. + + * Always begins with the section header "acknowledgments" + + * The server will respond with "NAK" if none of the object ids sent + as have lines were common. + + * The server will respond with "ACK obj-id" for all of the + object ids sent as have lines which are common. + + * A response cannot have both "ACK" lines as well as a "NAK" + line. + + * The server will respond with a "ready" line indicating that + the server has found an acceptable common base and is ready to + make and send a packfile (which will be found in the packfile + section of the same response) + + * If the server has found a suitable cut point and has decided + to send a "ready" line, then the server can decide to (as an + optimization) omit any "ACK" lines it would have sent during + its response. This is because the server will have already + determined the objects it plans to send to the client and no + further negotiation is needed. + + shallow-info section + * If the client has requested a shallow fetch/clone, a shallow + client requests a fetch or the server is shallow then the + server's response may include a shallow-info section. The + shallow-info section will be included if (due to one of the + above conditions) the server needs to inform the client of any + shallow boundaries or adjustments to the clients already + existing shallow boundaries. + + * Always begins with the section header "shallow-info" + + * If a positive depth is requested, the server will compute the + set of commits which are no deeper than the desired depth. + + * The server sends a "shallow obj-id" line for each commit whose + parents will not be sent in the following packfile. + + * The server sends an "unshallow obj-id" line for each commit + which the client has indicated is shallow, but is no longer + shallow as a result of the fetch (due to its parents being + sent in the following packfile). + + * The server MUST NOT send any "unshallow" lines for anything + which the client has not indicated was shallow as a part of + its request. + + * This section is only included if a packfile section is also + included in the response. + + packfile section + * This section is only included if the client has sent 'want' + lines in its request and either requested that no more + negotiation be done by sending 'done' or if the server has + decided it has found a sufficient cut point to produce a + packfile. + + * Always begins with the section header "packfile" + + * The transmission of the packfile begins immediately after the + section header + + * The data transfer of the packfile is always multiplexed, using + the same semantics of the 'side-band-64k' capability from + protocol version 1. This means that each packet, during the + packfile data stream, is made up of a leading 4-byte pkt-line + length (typical of the pkt-line format), followed by a 1-byte + stream code, followed by the actual data. + + The stream code can be one of: + 1 - pack data + 2 - progress messages + 3 - fatal error message just before stream aborts @@ -652,7 +652,6 @@ PROGRAM_OBJS += imap-send.o PROGRAM_OBJS += sh-i18n--envsubst.o PROGRAM_OBJS += shell.o PROGRAM_OBJS += show-index.o -PROGRAM_OBJS += upload-pack.o PROGRAM_OBJS += remote-testsvn.o # Binary suffix, set to .exe for Windows builds @@ -701,6 +700,7 @@ TEST_PROGRAMS_NEED_X += test-dump-untracked-cache TEST_PROGRAMS_NEED_X += test-fake-ssh TEST_PROGRAMS_NEED_X += test-line-buffer TEST_PROGRAMS_NEED_X += test-parse-options +TEST_PROGRAMS_NEED_X += test-pkt-line TEST_PROGRAMS_NEED_X += test-svn-fe TEST_PROGRAMS_NEED_X += test-tool @@ -842,6 +842,7 @@ LIB_OBJS += list-objects-filter-options.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o +LIB_OBJS += ls-refs.o LIB_OBJS += mailinfo.o LIB_OBJS += mailmap.o LIB_OBJS += match-trees.o @@ -898,6 +899,7 @@ LIB_OBJS += revision.o LIB_OBJS += run-command.o LIB_OBJS += send-pack.o LIB_OBJS += sequencer.o +LIB_OBJS += serve.o LIB_OBJS += server-info.o LIB_OBJS += setup.o LIB_OBJS += sha1-array.o @@ -926,6 +928,7 @@ LIB_OBJS += tree-diff.o LIB_OBJS += tree.o LIB_OBJS += tree-walk.o LIB_OBJS += unpack-trees.o +LIB_OBJS += upload-pack.o LIB_OBJS += url.o LIB_OBJS += urlmatch.o LIB_OBJS += usage.o @@ -1030,6 +1033,7 @@ BUILTIN_OBJS += builtin/rev-parse.o BUILTIN_OBJS += builtin/revert.o BUILTIN_OBJS += builtin/rm.o BUILTIN_OBJS += builtin/send-pack.o +BUILTIN_OBJS += builtin/serve.o BUILTIN_OBJS += builtin/shortlog.o BUILTIN_OBJS += builtin/show-branch.o BUILTIN_OBJS += builtin/show-ref.o @@ -1043,6 +1047,7 @@ BUILTIN_OBJS += builtin/update-index.o BUILTIN_OBJS += builtin/update-ref.o BUILTIN_OBJS += builtin/update-server-info.o BUILTIN_OBJS += builtin/upload-archive.o +BUILTIN_OBJS += builtin/upload-pack.o BUILTIN_OBJS += builtin/var.o BUILTIN_OBJS += builtin/verify-commit.o BUILTIN_OBJS += builtin/verify-pack.o @@ -215,6 +215,7 @@ extern int cmd_rev_parse(int argc, const char **argv, const char *prefix); extern int cmd_revert(int argc, const char **argv, const char *prefix); extern int cmd_rm(int argc, const char **argv, const char *prefix); extern int cmd_send_pack(int argc, const char **argv, const char *prefix); +extern int cmd_serve(int argc, const char **argv, const char *prefix); extern int cmd_shortlog(int argc, const char **argv, const char *prefix); extern int cmd_show(int argc, const char **argv, const char *prefix); extern int cmd_show_branch(int argc, const char **argv, const char *prefix); @@ -231,6 +232,7 @@ extern int cmd_update_ref(int argc, const char **argv, const char *prefix); extern int cmd_update_server_info(int argc, const char **argv, const char *prefix); extern int cmd_upload_archive(int argc, const char **argv, const char *prefix); extern int cmd_upload_archive_writer(int argc, const char **argv, const char *prefix); +extern int cmd_upload_pack(int argc, const char **argv, const char *prefix); extern int cmd_var(int argc, const char **argv, const char *prefix); extern int cmd_verify_commit(int argc, const char **argv, const char *prefix); extern int cmd_verify_tag(int argc, const char **argv, const char *prefix); diff --git a/builtin/clone.c b/builtin/clone.c index 7df5932b85..84f1473d19 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1135,7 +1135,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (transport->smart_options && !deepen && !filter_options.choice) transport->smart_options->check_self_contained_and_connected = 1; - refs = transport_get_remote_refs(transport); + refs = transport_get_remote_refs(transport, NULL); if (refs) { mapped_refs = wanted_peer_refs(refs, refspec); diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index a7bc1366ab..1a1bc63566 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -4,6 +4,7 @@ #include "remote.h" #include "connect.h" #include "sha1-array.h" +#include "protocol.h" static const char fetch_pack_usage[] = "git fetch-pack [--all] [--stdin] [--quiet | -q] [--keep | -k] [--thin] " @@ -52,6 +53,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) struct fetch_pack_args args; struct oid_array shallow = OID_ARRAY_INIT; struct string_list deepen_not = STRING_LIST_INIT_DUP; + struct packet_reader reader; fetch_if_missing = 0; @@ -211,10 +213,24 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) if (!conn) return args.diag_url ? 0 : 1; } - get_remote_heads(fd[0], NULL, 0, &ref, 0, NULL, &shallow); + + packet_reader_init(&reader, fd[0], NULL, 0, + PACKET_READ_CHOMP_NEWLINE | + PACKET_READ_GENTLE_ON_EOF); + + switch (discover_version(&reader)) { + case protocol_v2: + die("support for protocol v2 not implemented yet"); + case protocol_v1: + case protocol_v0: + get_remote_heads(&reader, &ref, 0, NULL, &shallow); + break; + case protocol_unknown_version: + BUG("unknown protocol version"); + } ref = fetch_pack(&args, fd, conn, ref, dest, sought, nr_sought, - &shallow, pack_lockfile_ptr); + &shallow, pack_lockfile_ptr, protocol_v0); if (pack_lockfile) { printf("lock %s\n", pack_lockfile); fflush(stdout); diff --git a/builtin/fetch.c b/builtin/fetch.c index dcdfc66f09..7ee83ac0f8 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -264,7 +264,7 @@ static void find_non_local_tags(struct transport *transport, struct string_list_item *item = NULL; for_each_ref(add_existing, &existing_refs); - for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) { + for (ref = transport_get_remote_refs(transport, NULL); ref; ref = ref->next) { if (!starts_with(ref->name, "refs/tags/")) continue; @@ -346,11 +346,28 @@ static struct ref *get_ref_map(struct transport *transport, struct ref *rm; struct ref *ref_map = NULL; |