From a2b26ffb1a81aa23dd14453f4db05d8fe24ee7cc Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 18 Apr 2020 20:52:34 -0700 Subject: fsck: convert gitmodules url to URL passed to curl In 07259e74ec1 (fsck: detect gitmodules URLs with embedded newlines, 2020-03-11), git fsck learned to check whether URLs in .gitmodules could be understood by the credential machinery when they are handled by git-remote-curl. However, the check is overbroad: it checks all URLs instead of only URLs that would be passed to git-remote-curl. In principle a git:// or file:/// URL does not need to follow the same conventions as an http:// URL; in particular, git:// and file:// protocols are not succeptible to issues in the credential API because they do not support attaching credentials. In the HTTP case, the URL in .gitmodules does not always match the URL that would be passed to git-remote-curl and the credential machinery: Git's URL syntax allows specifying a remote helper followed by a "::" delimiter and a URL to be passed to it, so that git ls-remote http::https://example.com/repo.git invokes git-remote-http with https://example.com/repo.git as its URL argument. With today's checks, that distinction does not make a difference, but for a check we are about to introduce (for empty URL schemes) it will matter. .gitmodules files also support relative URLs. To ensure coverage for the https based embedded-newline attack, urldecode and check them directly for embedded newlines. Helped-by: Jeff King Signed-off-by: Jonathan Nieder Reviewed-by: Jeff King --- fsck.c | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 89 insertions(+), 5 deletions(-) (limited to 'fsck.c') diff --git a/fsck.c b/fsck.c index 5b437c260c..4e3bc8622f 100644 --- a/fsck.c +++ b/fsck.c @@ -7,6 +7,7 @@ #include "tag.h" #include "fsck.h" #include "refs.h" +#include "url.h" #include "utf8.h" #include "sha1-array.h" #include "decorate.h" @@ -942,17 +943,100 @@ static int fsck_tag(struct tag *tag, const char *data, return fsck_tag_buffer(tag, data, size, options); } +/* + * Like builtin/submodule--helper.c's starts_with_dot_slash, but without + * relying on the platform-dependent is_dir_sep helper. + * + * This is for use in checking whether a submodule URL is interpreted as + * relative to the current directory on any platform, since \ is a + * directory separator on Windows but not on other platforms. + */ +static int starts_with_dot_slash(const char *str) +{ + return str[0] == '.' && (str[1] == '/' || str[1] == '\\'); +} + +/* + * Like starts_with_dot_slash, this is a variant of submodule--helper's + * helper of the same name with the twist that it accepts backslash as a + * directory separator even on non-Windows platforms. + */ +static int starts_with_dot_dot_slash(const char *str) +{ + return str[0] == '.' && starts_with_dot_slash(str + 1); +} + +static int submodule_url_is_relative(const char *url) +{ + return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url); +} + +/* + * Check whether a transport is implemented by git-remote-curl. + * + * If it is, returns 1 and writes the URL that would be passed to + * git-remote-curl to the "out" parameter. + * + * Otherwise, returns 0 and leaves "out" untouched. + * + * Examples: + * http::https://example.com/repo.git -> 1, https://example.com/repo.git + * https://example.com/repo.git -> 1, https://example.com/repo.git + * git://example.com/repo.git -> 0 + * + * This is for use in checking for previously exploitable bugs that + * required a submodule URL to be passed to git-remote-curl. + */ +static int url_to_curl_url(const char *url, const char **out) +{ + /* + * We don't need to check for case-aliases, "http.exe", and so + * on because in the default configuration, is_transport_allowed + * prevents URLs with those schemes from being cloned + * automatically. + */ + if (skip_prefix(url, "http::", out) || + skip_prefix(url, "https::", out) || + skip_prefix(url, "ftp::", out) || + skip_prefix(url, "ftps::", out)) + return 1; + if (starts_with(url, "http://") || + starts_with(url, "https://") || + starts_with(url, "ftp://") || + starts_with(url, "ftps://")) { + *out = url; + return 1; + } + return 0; +} + static int check_submodule_url(const char *url) { - struct credential c = CREDENTIAL_INIT; - int ret; + const char *curl_url; if (looks_like_command_line_option(url)) return -1; - ret = credential_from_url_gently(&c, url, 1); - credential_clear(&c); - return ret; + if (submodule_url_is_relative(url)) { + /* + * This could be appended to an http URL and url-decoded; + * check for malicious characters. + */ + char *decoded = url_decode(url); + int has_nl = !!strchr(decoded, '\n'); + free(decoded); + if (has_nl) + return -1; + } + + else if (url_to_curl_url(url, &curl_url)) { + struct credential c = CREDENTIAL_INIT; + int ret = credential_from_url_gently(&c, curl_url, 1); + credential_clear(&c); + return ret; + } + + return 0; } struct fsck_gitmodules_data { -- cgit v1.2.3 From c44088ecc4b0722636e0a305f9608d3047197282 Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 18 Apr 2020 20:54:13 -0700 Subject: credential: treat URL without scheme as invalid libcurl permits making requests without a URL scheme specified. In this case, it guesses the URL from the hostname, so I can run git ls-remote http::ftp.example.com/path/to/repo and it would make an FTP request. Any user intentionally using such a URL is likely to have made a typo. Unfortunately, credential_from_url is not able to determine the host and protocol in order to determine appropriate credentials to send, and until "credential: refuse to operate when missing host or protocol", this resulted in another host's credentials being leaked to the named host. Teach credential_from_url_gently to consider such a URL to be invalid so that fsck can detect and block gitmodules files with such URLs, allowing server operators to avoid serving them to downstream users running older versions of Git. This also means that when such URLs are passed on the command line, Git will print a clearer error so affected users can switch to the simpler URL that explicitly specifies the host and protocol they intend. One subtlety: .gitmodules files can contain relative URLs, representing a URL relative to the URL they were cloned from. The relative URL resolver used for .gitmodules can follow ".." components out of the path part and past the host part of a URL, meaning that such a relative URL can be used to traverse from a https://foo.example.com/innocent superproject to a https::attacker.example.com/exploit submodule. Fortunately a leading ':' in the first path component after a series of leading './' and '../' components is unlikely to show up in other contexts, so we can catch this by detecting that pattern. Reported-by: Jeff King Signed-off-by: Jonathan Nieder Reviewed-by: Jeff King --- fsck.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) (limited to 'fsck.c') diff --git a/fsck.c b/fsck.c index 4e3bc8622f..41af5c0d5f 100644 --- a/fsck.c +++ b/fsck.c @@ -971,6 +971,34 @@ static int submodule_url_is_relative(const char *url) return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url); } +/* + * Count directory components that a relative submodule URL should chop + * from the remote_url it is to be resolved against. + * + * In other words, this counts "../" components at the start of a + * submodule URL. + * + * Returns the number of directory components to chop and writes a + * pointer to the next character of url after all leading "./" and + * "../" components to out. + */ +static int count_leading_dotdots(const char *url, const char **out) +{ + int result = 0; + while (1) { + if (starts_with_dot_dot_slash(url)) { + result++; + url += strlen("../"); + continue; + } + if (starts_with_dot_slash(url)) { + url += strlen("./"); + continue; + } + *out = url; + return result; + } +} /* * Check whether a transport is implemented by git-remote-curl. * @@ -1018,15 +1046,30 @@ static int check_submodule_url(const char *url) return -1; if (submodule_url_is_relative(url)) { + char *decoded; + const char *next; + int has_nl; + /* * This could be appended to an http URL and url-decoded; * check for malicious characters. */ - char *decoded = url_decode(url); - int has_nl = !!strchr(decoded, '\n'); + decoded = url_decode(url); + has_nl = !!strchr(decoded, '\n'); + free(decoded); if (has_nl) return -1; + + /* + * URLs which escape their root via "../" can overwrite + * the host field and previous components, resolving to + * URLs like https::example.com/submodule.git that were + * susceptible to CVE-2020-11008. + */ + if (count_leading_dotdots(url, &next) > 0 && + *next == ':') + return -1; } else if (url_to_curl_url(url, &curl_url)) { -- cgit v1.2.3 From 1a3609e402a062ef7b11f197fe96c28cabca132c Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Sat, 18 Apr 2020 20:57:22 -0700 Subject: fsck: reject URL with empty host in .gitmodules Git's URL parser interprets https:///example.com/repo.git to have no host and a path of "example.com/repo.git". Curl, on the other hand, internally redirects it to https://example.com/repo.git. As a result, until "credential: parse URL without host as empty host, not unset", tricking a user into fetching from such a URL would cause Git to send credentials for another host to example.com. Teach fsck to block and detect .gitmodules files using such a URL to prevent sharing them with Git versions that are not yet protected. A relative URL in a .gitmodules file could also be used to trigger this. The relative URL resolver used for .gitmodules does not normalize sequences of slashes and can follow ".." components out of the path part and to the host part of a URL, meaning that such a relative URL can be used to traverse from a https://foo.example.com/innocent superproject to a https:///attacker.example.com/exploit submodule. Fortunately, redundant extra slashes in .gitmodules are rare, so we can catch this by detecting one after a leading sequence of "./" and "../" components. Helped-by: Jeff King Signed-off-by: Jonathan Nieder Reviewed-by: Jeff King --- fsck.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fsck.c') diff --git a/fsck.c b/fsck.c index 41af5c0d5f..31b5be05f5 100644 --- a/fsck.c +++ b/fsck.c @@ -1064,17 +1064,21 @@ static int check_submodule_url(const char *url) /* * URLs which escape their root via "../" can overwrite * the host field and previous components, resolving to - * URLs like https::example.com/submodule.git that were + * URLs like https::example.com/submodule.git and + * https:///example.com/submodule.git that were * susceptible to CVE-2020-11008. */ if (count_leading_dotdots(url, &next) > 0 && - *next == ':') + (*next == ':' || *next == '/')) return -1; } else if (url_to_curl_url(url, &curl_url)) { struct credential c = CREDENTIAL_INIT; - int ret = credential_from_url_gently(&c, curl_url, 1); + int ret = 0; + if (credential_from_url_gently(&c, curl_url, 1) || + !*c.host) + ret = -1; credential_clear(&c); return ret; } -- cgit v1.2.3