From e2a0ccc01fab1770bd3b2767481137c6f728e5ab Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:28:50 -0400 Subject: test-lib: preserve GIT_CURL_VERBOSE from the environment Turning on this variable can be useful when debugging http tests. It does break a few tests in t5541, but it is not a variable that the user is likely to have enabled accidentally. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/test-lib.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/t/test-lib.sh b/t/test-lib.sh index c081668dfe..f7e55b1001 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -91,6 +91,7 @@ unset VISUAL EMAIL LANGUAGE COLUMNS $("$PERL_PATH" -e ' VALGRIND UNZIP PERF_ + CURL_VERBOSE )); my @vars = grep(/^GIT_/ && !/^GIT_($ok)/o, @env); print join("\n", @vars); -- cgit v1.2.3 From c7db2d16474874b21aad9141e6856f836f719643 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:28:56 -0400 Subject: t/lib-httpd: use write_script to copy CGI scripts Using write_script will set our shebang line appropriately with $SHELL_PATH. The script that is there now is quite simple and likely to succeed even with a non-POSIX /bin/sh, but it does not hurt to be defensive. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/lib-httpd.sh | 6 +++++- t/lib-httpd/broken-smart-http.sh | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) mode change 100755 => 100644 t/lib-httpd/broken-smart-http.sh diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh index 252cbf163b..8e680efbf0 100644 --- a/t/lib-httpd.sh +++ b/t/lib-httpd.sh @@ -105,10 +105,14 @@ else "Could not identify web server at '$LIB_HTTPD_PATH'" fi +install_script () { + write_script "$HTTPD_ROOT_PATH/$1" <"$TEST_PATH/$1" +} + prepare_httpd() { mkdir -p "$HTTPD_DOCUMENT_ROOT_PATH" cp "$TEST_PATH"/passwd "$HTTPD_ROOT_PATH" - cp "$TEST_PATH"/broken-smart-http.sh "$HTTPD_ROOT_PATH" + install_script broken-smart-http.sh ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules" diff --git a/t/lib-httpd/broken-smart-http.sh b/t/lib-httpd/broken-smart-http.sh old mode 100755 new mode 100644 index f7ebfffa80..82cc610b0a --- a/t/lib-httpd/broken-smart-http.sh +++ b/t/lib-httpd/broken-smart-http.sh @@ -1,4 +1,3 @@ -#!/bin/sh printf "Content-Type: text/%s\n" "html" echo printf "%s\n" "001e# service=git-upload-pack" -- cgit v1.2.3 From dbcf2bd3dec1244fdbafb3ec7312ed14d83c0025 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:29:03 -0400 Subject: t5550: test display of remote http error messages Since commit 426e70d (remote-curl: show server content on http errors, 2013-04-05), we relay any text/plain error messages from the remote server to the user. However, we never tested it. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/lib-httpd.sh | 1 + t/lib-httpd/apache.conf | 4 ++++ t/lib-httpd/error.sh | 17 +++++++++++++++++ t/t5550-http-fetch-dumb.sh | 10 ++++++++++ 4 files changed, 32 insertions(+) create mode 100755 t/lib-httpd/error.sh diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh index 8e680efbf0..f7640bee9a 100644 --- a/t/lib-httpd.sh +++ b/t/lib-httpd.sh @@ -113,6 +113,7 @@ prepare_httpd() { mkdir -p "$HTTPD_DOCUMENT_ROOT_PATH" cp "$TEST_PATH"/passwd "$HTTPD_ROOT_PATH" install_script broken-smart-http.sh + install_script error.sh ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules" diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf index 3a03e8263d..b384d79935 100644 --- a/t/lib-httpd/apache.conf +++ b/t/lib-httpd/apache.conf @@ -97,12 +97,16 @@ Alias /auth/dumb/ www/auth/dumb/ ScriptAliasMatch /smart_*[^/]*/(.*) ${GIT_EXEC_PATH}/git-http-backend/$1 ScriptAlias /broken_smart/ broken-smart-http.sh/ +ScriptAlias /error/ error.sh/ Options FollowSymlinks Options ExecCGI + + Options ExecCGI + Options ExecCGI diff --git a/t/lib-httpd/error.sh b/t/lib-httpd/error.sh new file mode 100755 index 0000000000..786f2816bb --- /dev/null +++ b/t/lib-httpd/error.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +printf "Status: 500 Intentional Breakage\n" + +printf "Content-Type: " +case "$PATH_INFO" in +*html*) + printf "text/html" + ;; +*text*) + printf "text/plain" + ;; +esac +printf "\n" + +printf "\n" +printf "this is the error message\n" diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index 1a3a2b6c1a..13defd3886 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -171,5 +171,15 @@ test_expect_success 'did not use upload-pack service' ' test_cmp exp act ' +test_expect_success 'git client shows text/plain errors' ' + test_must_fail git clone "$HTTPD_URL/error/text" 2>stderr && + grep "this is the error message" stderr +' + +test_expect_success 'git client does not show html errors' ' + test_must_fail git clone "$HTTPD_URL/error/html" 2>stderr && + ! grep "this is the error message" stderr +' + stop_httpd test_done -- cgit v1.2.3 From bf197fd7eebcb3579dd659af35822ce88adc66c8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:29:47 -0400 Subject: http: extract type/subtype portion of content-type When we get a content-type from curl, we get the whole header line, including any parameters, and without any normalization (like downcasing or whitespace) applied. If we later try to match it with strcmp() or even strcasecmp(), we may get false negatives. This could cause two visible behaviors: 1. We might fail to recognize a smart-http server by its content-type. 2. We might fail to relay text/plain error messages to users (especially if they contain a charset parameter). This patch teaches the http code to extract and normalize just the type/subtype portion of the string. This is technically passing out less information to the callers, who can no longer see the parameters. But none of the current callers cares, and a future patch will add back an easier-to-use method for accessing those parameters. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http.c | 38 +++++++++++++++++++++++++++++++++++--- remote-curl.c | 2 +- t/lib-httpd/error.sh | 8 +++++++- t/t5550-http-fetch-dumb.sh | 5 +++++ 4 files changed, 48 insertions(+), 5 deletions(-) diff --git a/http.c b/http.c index 94e1afdee7..6bfd0934b3 100644 --- a/http.c +++ b/http.c @@ -906,6 +906,35 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) return ret; } +/* + * Extract a normalized version of the content type, with any + * spaces suppressed, all letters lowercased, and no trailing ";" + * or parameters. + * + * Note that we will silently remove even invalid whitespace. For + * example, "text / plain" is specifically forbidden by RFC 2616, + * but "text/plain" is the only reasonable output, and this keeps + * our code simple. + * + * Example: + * "TEXT/PLAIN; charset=utf-8" -> "text/plain" + * "text / plain" -> "text/plain" + */ +static void extract_content_type(struct strbuf *raw, struct strbuf *type) +{ + const char *p; + + strbuf_reset(type); + strbuf_grow(type, raw->len); + for (p = raw->buf; *p; p++) { + if (isspace(*p)) + continue; + if (*p == ';') + break; + strbuf_addch(type, tolower(*p)); + } +} + /* http_request() targets */ #define HTTP_REQUEST_STRBUF 0 #define HTTP_REQUEST_FILE 1 @@ -957,9 +986,12 @@ static int http_request(const char *url, ret = run_one_slot(slot, &results); - if (options && options->content_type) - curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, - options->content_type); + if (options && options->content_type) { + struct strbuf raw = STRBUF_INIT; + curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &raw); + extract_content_type(&raw, options->content_type); + strbuf_release(&raw); + } if (options && options->effective_url) curlinfo_strbuf(slot->curl, CURLINFO_EFFECTIVE_URL, diff --git a/remote-curl.c b/remote-curl.c index 52c2d96ce6..a5ab977306 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -205,7 +205,7 @@ static int show_http_message(struct strbuf *type, struct strbuf *msg) * TODO should handle "; charset=XXX", and re-encode into * logoutputencoding */ - if (strcasecmp(type->buf, "text/plain")) + if (strcmp(type->buf, "text/plain")) return -1; strbuf_trim(msg); diff --git a/t/lib-httpd/error.sh b/t/lib-httpd/error.sh index 786f2816bb..23cec97cc3 100755 --- a/t/lib-httpd/error.sh +++ b/t/lib-httpd/error.sh @@ -3,6 +3,7 @@ printf "Status: 500 Intentional Breakage\n" printf "Content-Type: " +charset=iso-8859-1 case "$PATH_INFO" in *html*) printf "text/html" @@ -10,8 +11,13 @@ case "$PATH_INFO" in *text*) printf "text/plain" ;; +*charset*) + printf "text/plain; charset=utf-8" + charset=utf-8 + ;; esac printf "\n" printf "\n" -printf "this is the error message\n" +printf "this is the error message\n" | +iconv -f us-ascii -t $charset diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index 13defd3886..b35b2610e3 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -181,5 +181,10 @@ test_expect_success 'git client does not show html errors' ' ! grep "this is the error message" stderr ' +test_expect_success 'git client shows text/plain with a charset' ' + test_must_fail git clone "$HTTPD_URL/error/charset" 2>stderr && + grep "this is the error message" stderr +' + stop_httpd test_done -- cgit v1.2.3 From e31316263af98c4583be39b469f3152a23eba91d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:30:05 -0400 Subject: http: optionally extract charset parameter from content-type Since the previous commit, we now give a sanitized, shortened version of the content-type header to any callers who ask for it. This patch adds back a way for them to cleanly access specific parameters to the type. We could easily extract all parameters and make them available via a string_list, but: 1. That complicates the interface and memory management. 2. In practice, no planned callers care about anything except the charset. This patch therefore goes with the simplest thing, and we can expand or change the interface later if it becomes necessary. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++---- http.h | 7 +++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/http.c b/http.c index 6bfd0934b3..84463dff3d 100644 --- a/http.c +++ b/http.c @@ -906,6 +906,32 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) return ret; } +/* + * Check for and extract a content-type parameter. "raw" + * should be positioned at the start of the potential + * parameter, with any whitespace already removed. + * + * "name" is the name of the parameter. The value is appended + * to "out". + */ +static int extract_param(const char *raw, const char *name, + struct strbuf *out) +{ + size_t len = strlen(name); + + if (strncasecmp(raw, name, len)) + return -1; + raw += len; + + if (*raw != '=') + return -1; + raw++; + + while (*raw && !isspace(*raw)) + strbuf_addch(out, *raw++); + return 0; +} + /* * Extract a normalized version of the content type, with any * spaces suppressed, all letters lowercased, and no trailing ";" @@ -916,11 +942,15 @@ static CURLcode curlinfo_strbuf(CURL *curl, CURLINFO info, struct strbuf *buf) * but "text/plain" is the only reasonable output, and this keeps * our code simple. * + * If the "charset" argument is not NULL, store the value of any + * charset parameter there. + * * Example: - * "TEXT/PLAIN; charset=utf-8" -> "text/plain" + * "TEXT/PLAIN; charset=utf-8" -> "text/plain", "utf-8" * "text / plain" -> "text/plain" */ -static void extract_content_type(struct strbuf *raw, struct strbuf *type) +static void extract_content_type(struct strbuf *raw, struct strbuf *type, + struct strbuf *charset) { const char *p; @@ -929,10 +959,25 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type) for (p = raw->buf; *p; p++) { if (isspace(*p)) continue; - if (*p == ';') + if (*p == ';') { + p++; break; + } strbuf_addch(type, tolower(*p)); } + + if (!charset) + return; + + strbuf_reset(charset); + while (*p) { + while (isspace(*p)) + p++; + if (!extract_param(p, "charset", charset)) + return; + while (*p && !isspace(*p)) + p++; + } } /* http_request() targets */ @@ -989,7 +1034,8 @@ static int http_request(const char *url, if (options && options->content_type) { struct strbuf raw = STRBUF_INIT; curlinfo_strbuf(slot->curl, CURLINFO_CONTENT_TYPE, &raw); - extract_content_type(&raw, options->content_type); + extract_content_type(&raw, options->content_type, + options->charset); strbuf_release(&raw); } diff --git a/http.h b/http.h index e64084fe6d..473179b14d 100644 --- a/http.h +++ b/http.h @@ -143,6 +143,13 @@ struct http_get_options { /* If non-NULL, returns the content-type of the response. */ struct strbuf *content_type; + /* + * If non-NULL, and content_type above is non-NULL, returns + * the charset parameter from the content-type. If none is + * present, returns an empty string. + */ + struct strbuf *charset; + /* * If non-NULL, returns the URL we ended up at, including any * redirects we followed. -- cgit v1.2.3 From d4241f52d1a19bf464d63cbc4cd67fcc6a3af01d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:30:14 -0400 Subject: strbuf: add strbuf_reencode helper This is a convenience wrapper around `reencode_string_len` and `strbuf_attach`. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/technical/api-strbuf.txt | 5 +++++ strbuf.c | 17 +++++++++++++++++ strbuf.h | 1 + 3 files changed, 23 insertions(+) diff --git a/Documentation/technical/api-strbuf.txt b/Documentation/technical/api-strbuf.txt index 3350d97dda..9d28b034ad 100644 --- a/Documentation/technical/api-strbuf.txt +++ b/Documentation/technical/api-strbuf.txt @@ -125,6 +125,11 @@ Functions Strip whitespace from the end of a string. +`strbuf_reencode`:: + + Replace the contents of the strbuf with a reencoded form. Returns -1 + on error, 0 on success. + `strbuf_cmp`:: Compare two buffers. Returns an integer less than, equal to, or greater diff --git a/strbuf.c b/strbuf.c index ee96dcfb81..fc7290f57a 100644 --- a/strbuf.c +++ b/strbuf.c @@ -1,5 +1,6 @@ #include "cache.h" #include "refs.h" +#include "utf8.h" int starts_with(const char *str, const char *prefix) { @@ -106,6 +107,22 @@ void strbuf_ltrim(struct strbuf *sb) sb->buf[sb->len] = '\0'; } +int strbuf_reencode(struct strbuf *sb, const char *from, const char *to) +{ + char *out; + int len; + + if (same_encoding(from, to)) + return 0; + + out = reencode_string_len(sb->buf, sb->len, to, from, &len); + if (!out) + return -1; + + strbuf_attach(sb, out, len, len); + return 0; +} + struct strbuf **strbuf_split_buf(const char *str, size_t slen, int terminator, int max) { diff --git a/strbuf.h b/strbuf.h index 39c14cfa38..4e9a2f8868 100644 --- a/strbuf.h +++ b/strbuf.h @@ -45,6 +45,7 @@ static inline void strbuf_setlen(struct strbuf *sb, size_t len) extern void strbuf_trim(struct strbuf *); extern void strbuf_rtrim(struct strbuf *); extern void strbuf_ltrim(struct strbuf *); +extern int strbuf_reencode(struct strbuf *sb, const char *from, const char *to); extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); /* -- cgit v1.2.3 From fc1b774c72990f0ff92370316412b19fd72baa77 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:30:29 -0400 Subject: remote-curl: reencode http error messages We currently recognize an error message with a content-type "text/plain; charset=utf-16" as text, but we ignore the charset parameter entirely. Let's encode it to log_output_encoding, which is presumably something the user's terminal can handle. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- remote-curl.c | 17 ++++++++++------- t/lib-httpd/error.sh | 4 ++++ t/t5550-http-fetch-dumb.sh | 5 +++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/remote-curl.c b/remote-curl.c index a5ab977306..4493b389de 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -194,19 +194,19 @@ static void free_discovery(struct discovery *d) } } -static int show_http_message(struct strbuf *type, struct strbuf *msg) +static int show_http_message(struct strbuf *type, struct strbuf *charset, + struct strbuf *msg) { const char *p, *eol; /* * We only show text/plain parts, as other types are likely * to be ugly to look at on the user's terminal. - * - * TODO should handle "; charset=XXX", and re-encode into - * logoutputencoding */ if (strcmp(type->buf, "text/plain")) return -1; + if (charset->len) + strbuf_reencode(msg, charset->buf, get_log_output_encoding()); strbuf_trim(msg); if (!msg->len) @@ -225,6 +225,7 @@ static struct discovery* discover_refs(const char *service, int for_push) { struct strbuf exp = STRBUF_INIT; struct strbuf type = STRBUF_INIT; + struct strbuf charset = STRBUF_INIT; struct strbuf buffer = STRBUF_INIT; struct strbuf refs_url = STRBUF_INIT; struct strbuf effective_url = STRBUF_INIT; @@ -249,6 +250,7 @@ static struct discovery* discover_refs(const char *service, int for_push) memset(&options, 0, sizeof(options)); options.content_type = &type; + options.charset = &charset; options.effective_url = &effective_url; options.base_url = &url; options.no_cache = 1; @@ -259,13 +261,13 @@ static struct discovery* discover_refs(const char *service, int for_push) case HTTP_OK: break; case HTTP_MISSING_TARGET: - show_http_message(&type, &buffer); + show_http_message(&type, &charset, &buffer); die("repository '%s' not found", url.buf); case HTTP_NOAUTH: - show_http_message(&type, &buffer); + show_http_message(&type, &charset, &buffer); die("Authentication failed for '%s'", url.buf); default: - show_http_message(&type, &buffer); + show_http_message(&type, &charset, &buffer); die("unable to access '%s': %s", url.buf, curl_errorstr); } @@ -310,6 +312,7 @@ static struct discovery* discover_refs(const char *service, int for_push) strbuf_release(&refs_url); strbuf_release(&exp); strbuf_release(&type); + strbuf_release(&charset); strbuf_release(&effective_url); strbuf_release(&buffer); last_discovery = last; diff --git a/t/lib-httpd/error.sh b/t/lib-httpd/error.sh index 23cec97cc3..eafc9d2d90 100755 --- a/t/lib-httpd/error.sh +++ b/t/lib-httpd/error.sh @@ -15,6 +15,10 @@ case "$PATH_INFO" in printf "text/plain; charset=utf-8" charset=utf-8 ;; +*utf16*) + printf "text/plain; charset=utf-16" + charset=utf-16 + ;; esac printf "\n" diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh index b35b2610e3..01b8aae2ed 100755 --- a/t/t5550-http-fetch-dumb.sh +++ b/t/t5550-http-fetch-dumb.sh @@ -186,5 +186,10 @@ test_expect_success 'git client shows text/plain with a charset' ' grep "this is the error message" stderr ' +test_expect_success 'http error messages are reencoded' ' + test_must_fail git clone "$HTTPD_URL/error/utf16" 2>stderr && + grep "this is the error message" stderr +' + stop_httpd test_done -- cgit v1.2.3 From c553fd1c1ef76688b6200e66a825b530b0b02140 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 22 May 2014 05:36:12 -0400 Subject: http: default text charset to iso-8859-1 This is specified by RFC 2616 as the default if no "charset" parameter is given. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/http.c b/http.c index 84463dff3d..2b4f6a357c 100644 --- a/http.c +++ b/http.c @@ -978,6 +978,9 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type, while (*p && !isspace(*p)) p++; } + + if (!charset->len && starts_with(type->buf, "text/")) + strbuf_addstr(charset, "ISO-8859-1"); } /* http_request() targets */ -- cgit v1.2.3