From cf3c6352100a0d302276e46e3f9a7f0804e224d8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 12 Dec 2016 14:52:22 -0500 Subject: alternates: accept double-quoted paths We read lists of alternates from objects/info/alternates files (delimited by newline), as well as from the GIT_ALTERNATE_OBJECT_DIRECTORIES environment variable (delimited by colon or semi-colon, depending on the platform). There's no mechanism for quoting the delimiters, so it's impossible to specify an alternate path that contains a colon in the environment, or one that contains a newline in a file. We've lived with that restriction for ages because both alternates and filenames with colons are relatively rare, and it's only a problem when the two meet. But since 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03), which builds on the alternates system, every push causes the receiver to set GIT_ALTERNATE_OBJECT_DIRECTORIES internally. It would be convenient to have some way to quote the delimiter so that we can represent arbitrary paths. The simplest thing would be an escape character before a quoted delimiter (e.g., "\:" as a literal colon). But that creates a backwards compatibility problem: any path which uses that escape character is now broken, and we've just shifted the problem. We could choose an unlikely escape character (e.g., something from the non-printable ASCII range), but that's awkward to use. Instead, let's treat names as unquoted unless they begin with a double-quote, in which case they are interpreted via our usual C-stylke quoting rules. This also breaks backwards-compatibility, but in a smaller way: it only matters if your file has a double-quote as the very _first_ character in the path (whereas an escape character is a problem anywhere in the path). It's also consistent with many other parts of git, which accept either a bare pathname or a double-quoted one, and the sender can choose to quote or not as required. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git.txt | 6 ++++++ sha1_file.c | 47 ++++++++++++++++++++++++++++++++++++----------- t/t5615-alternate-env.sh | 18 ++++++++++++++++++ 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/Documentation/git.txt b/Documentation/git.txt index b8bec711f4..5f7826bf39 100644 --- a/Documentation/git.txt +++ b/Documentation/git.txt @@ -859,6 +859,12 @@ Git so take care if using a foreign front-end. specifies a ":" separated (on Windows ";" separated) list of Git object directories which can be used to search for Git objects. New objects will not be written to these directories. ++ + Entries that begin with `"` (double-quote) will be interpreted + as C-style quoted paths, removing leading and trailing + double-quotes and respecting backslash escapes. E.g., the value + `"path-with-\"-and-:-in-it":vanilla-path` has two paths: + `path-with-"-and-:-in-it` and `vanilla-path`. `GIT_DIR`:: If the `GIT_DIR` environment variable is set then it diff --git a/sha1_file.c b/sha1_file.c index fbafdbed94..fc6d864fda 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -26,6 +26,7 @@ #include "mru.h" #include "list.h" #include "mergesort.h" +#include "quote.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -329,13 +330,40 @@ static int link_alt_odb_entry(const char *entry, const char *relative_base, return 0; } +static const char *parse_alt_odb_entry(const char *string, + int sep, + struct strbuf *out) +{ + const char *end; + + strbuf_reset(out); + + if (*string == '#') { + /* comment; consume up to next separator */ + end = strchrnul(string, sep); + } else if (*string == '"' && !unquote_c_style(out, string, &end)) { + /* + * quoted path; unquote_c_style has copied the + * data for us and set "end". Broken quoting (e.g., + * an entry that doesn't end with a quote) falls + * back to the unquoted case below. + */ + } else { + /* normal, unquoted path */ + end = strchrnul(string, sep); + strbuf_add(out, string, end - string); + } + + if (*end) + end++; + return end; +} + static void link_alt_odb_entries(const char *alt, int len, int sep, const char *relative_base, int depth) { - struct string_list entries = STRING_LIST_INIT_NODUP; - char *alt_copy; - int i; struct strbuf objdirbuf = STRBUF_INIT; + struct strbuf entry = STRBUF_INIT; if (depth > 5) { error("%s: ignoring alternate object stores, nesting too deep.", @@ -348,16 +376,13 @@ static void link_alt_odb_entries(const char *alt, int len, int sep, die("unable to normalize object directory: %s", objdirbuf.buf); - alt_copy = xmemdupz(alt, len); - string_list_split_in_place(&entries, alt_copy, sep, -1); - for (i = 0; i < entries.nr; i++) { - const char *entry = entries.items[i].string; - if (entry[0] == '\0' || entry[0] == '#') + while (*alt) { + alt = parse_alt_odb_entry(alt, sep, &entry); + if (!entry.len) continue; - link_alt_odb_entry(entry, relative_base, depth, objdirbuf.buf); + link_alt_odb_entry(entry.buf, relative_base, depth, objdirbuf.buf); } - string_list_clear(&entries, 0); - free(alt_copy); + strbuf_release(&entry); strbuf_release(&objdirbuf); } diff --git a/t/t5615-alternate-env.sh b/t/t5615-alternate-env.sh index 22d9d8178b..c33d089980 100755 --- a/t/t5615-alternate-env.sh +++ b/t/t5615-alternate-env.sh @@ -68,4 +68,22 @@ test_expect_success 'access alternate via relative path (subdir)' ' EOF ' +# set variables outside test to avoid quote insanity; the \057 is '/', +# which doesn't need quoting, but just confirms that de-quoting +# is working. +quoted='"one.git\057objects"' +unquoted='two.git/objects' +test_expect_success 'mix of quoted and unquoted alternates' ' + check_obj "$quoted:$unquoted" <<-EOF + $one blob + $two blob +' + +test_expect_success 'broken quoting falls back to interpreting raw' ' + mv one.git \"one.git && + check_obj \"one.git/objects <<-EOF + $one blob + EOF +' + test_done -- cgit v1.2.3 From aae2ae4f74f91f434f7f5c3ac25f37d80a9b319e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 12 Dec 2016 14:53:55 -0500 Subject: tmp-objdir: quote paths we add to alternates Commit 722ff7f87 (receive-pack: quarantine objects until pre-receive accepts, 2016-10-03) regressed pushes to repositories with colon (or semi-colon in Windows in them) because it adds the repository's main object directory to GIT_ALTERNATE_OBJECT_DIRECTORIES. The receiver interprets the colon as a delimiter, not as part of the path, and index-pack is unable to find objects which it needs to resolve deltas. The previous commit introduced a quoting mechanism for the alternates list; let's use it here to cover this case. We'll avoid quoting when we can, though. This alternate setup is also used when calling hooks, so it's possible that the user may call older git implementations which don't understand the quoting mechanism. By quoting only when necessary, this setup will continue to work unless the user _also_ has a repository whose path contains the delimiter. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5547-push-quarantine.sh | 19 +++++++++++++++++++ tmp-objdir.c | 18 +++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/t/t5547-push-quarantine.sh b/t/t5547-push-quarantine.sh index 1e5d32d068..6275ec807b 100755 --- a/t/t5547-push-quarantine.sh +++ b/t/t5547-push-quarantine.sh @@ -33,4 +33,23 @@ test_expect_success 'rejected objects are removed' ' test_cmp expect actual ' +# MINGW does not allow colons in pathnames in the first place +test_expect_success !MINGW 'push to repo path with colon' ' + # The interesting failure case here is when the + # receiving end cannot access its original object directory, + # so make it likely for us to generate a delta by having + # a non-trivial file with multiple versions. + + test-genrandom foo 4096 >file.bin && + git add file.bin && + git commit -m bin && + git clone --bare . xxx:yyy.git && + + echo change >>file.bin && + git commit -am change && + # Note that we have to use the full path here, or it gets confused + # with the ssh host:path syntax. + git push "$PWD/xxx:yyy.git" HEAD +' + test_done diff --git a/tmp-objdir.c b/tmp-objdir.c index 64435f23a4..b2d9280f10 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -5,6 +5,7 @@ #include "string-list.h" #include "strbuf.h" #include "argv-array.h" +#include "quote.h" struct tmp_objdir { struct strbuf path; @@ -79,12 +80,27 @@ static void remove_tmp_objdir_on_signal(int signo) */ static void env_append(struct argv_array *env, const char *key, const char *val) { - const char *old = getenv(key); + struct strbuf quoted = STRBUF_INIT; + const char *old; + /* + * Avoid quoting if it's not necessary, for maximum compatibility + * with older parsers which don't understand the quoting. + */ + if (*val == '"' || strchr(val, PATH_SEP)) { + strbuf_addch("ed, '"'); + quote_c_style(val, "ed, NULL, 1); + strbuf_addch("ed, '"'); + val = quoted.buf; + } + + old = getenv(key); if (!old) argv_array_pushf(env, "%s=%s", key, val); else argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP, val); + + strbuf_release("ed); } static void env_replace(struct argv_array *env, const char *key, const char *val) -- cgit v1.2.3 From eaa76de0dff57f13a6fb7f791f3a486a407f5c27 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Tue, 13 Dec 2016 20:09:31 +0100 Subject: t5547-push-quarantine: run the path separator test on Windows, too To perform the test case on Windows in a way that corresponds to the POSIX version, inject the semicolon in a directory name. Typically, an absolute POSIX style path, such as the one in $PWD, is translated into a Windows style path by bash when it invokes git.exe. However, the presence of the semicolon suppresses this translation; but the untranslated POSIX style path is useless for git.exe. Therefore, instead of $PWD pass the Windows style path that $(pwd) produces. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- t/t5547-push-quarantine.sh | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/t/t5547-push-quarantine.sh b/t/t5547-push-quarantine.sh index 6275ec807b..af9fcd833a 100755 --- a/t/t5547-push-quarantine.sh +++ b/t/t5547-push-quarantine.sh @@ -33,8 +33,7 @@ test_expect_success 'rejected objects are removed' ' test_cmp expect actual ' -# MINGW does not allow colons in pathnames in the first place -test_expect_success !MINGW 'push to repo path with colon' ' +test_expect_success 'push to repo path with path separator (colon)' ' # The interesting failure case here is when the # receiving end cannot access its original object directory, # so make it likely for us to generate a delta by having @@ -43,13 +42,20 @@ test_expect_success !MINGW 'push to repo path with colon' ' test-genrandom foo 4096 >file.bin && git add file.bin && git commit -m bin && - git clone --bare . xxx:yyy.git && + + if test_have_prereq MINGW + then + pathsep=";" + else + pathsep=":" + fi && + git clone --bare . "xxx${pathsep}yyy.git" && echo change >>file.bin && git commit -am change && # Note that we have to use the full path here, or it gets confused # with the ssh host:path syntax. - git push "$PWD/xxx:yyy.git" HEAD + git push "$(pwd)/xxx${pathsep}yyy.git" HEAD ' test_done -- cgit v1.2.3 From 5e74824fac646e2ebe335a00bcecd91641a7f7ca Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 21 Dec 2016 22:33:43 +0100 Subject: t5615-alternate-env: double-quotes in file names do not work on Windows Protect a recently added test case with !MINGW. Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- t/t5615-alternate-env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t5615-alternate-env.sh b/t/t5615-alternate-env.sh index c33d089980..79628db3ec 100755 --- a/t/t5615-alternate-env.sh +++ b/t/t5615-alternate-env.sh @@ -79,7 +79,7 @@ test_expect_success 'mix of quoted and unquoted alternates' ' $two blob ' -test_expect_success 'broken quoting falls back to interpreting raw' ' +test_expect_success !MINGW 'broken quoting falls back to interpreting raw' ' mv one.git \"one.git && check_obj \"one.git/objects <<-EOF $one blob -- cgit v1.2.3