diff options
Diffstat (limited to 'http-walker.c')
-rw-r--r-- | http-walker.c | 534 |
1 files changed, 87 insertions, 447 deletions
diff --git a/http-walker.c b/http-walker.c index 0dbad3c888..700bc13112 100644 --- a/http-walker.c +++ b/http-walker.c @@ -1,12 +1,8 @@ #include "cache.h" #include "commit.h" -#include "pack.h" #include "walker.h" #include "http.h" -#define PREV_BUF_SIZE 4096 -#define RANGE_HEADER_SIZE 30 - struct alt_base { char *base; @@ -27,20 +23,8 @@ struct object_request struct walker *walker; unsigned char sha1[20]; struct alt_base *repo; - char *url; - char filename[PATH_MAX]; - char tmpfile[PATH_MAX]; - int local; enum object_request_state state; - CURLcode curl_result; - char errorstr[CURL_ERROR_SIZE]; - long http_code; - unsigned char real_sha1[20]; - git_SHA_CTX c; - z_stream stream; - int zret; - int rename; - struct active_request_slot *slot; + struct http_object_request *req; struct object_request *next; }; @@ -57,39 +41,10 @@ struct walker_data { const char *url; int got_alternates; struct alt_base *alt; - struct curl_slist *no_pragma_header; }; static struct object_request *object_queue_head; -static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb, - void *data) -{ - unsigned char expn[4096]; - size_t size = eltsize * nmemb; - int posn = 0; - struct object_request *obj_req = (struct object_request *)data; - do { - ssize_t retval = xwrite(obj_req->local, - (char *) ptr + posn, size - posn); - if (retval < 0) - return posn; - posn += retval; - } while (posn < size); - - obj_req->stream.avail_in = size; - obj_req->stream.next_in = ptr; - do { - obj_req->stream.next_out = expn; - obj_req->stream.avail_out = sizeof(expn); - obj_req->zret = git_inflate(&obj_req->stream, Z_SYNC_FLUSH); - git_SHA1_Update(&obj_req->c, expn, - sizeof(expn) - obj_req->stream.avail_out); - } while (obj_req->stream.avail_in && obj_req->zret == Z_OK); - data_received++; - return size; -} - static void fetch_alternates(struct walker *walker, const char *base); static void process_object_response(void *callback_data); @@ -97,166 +52,35 @@ static void process_object_response(void *callback_data); static void start_object_request(struct walker *walker, struct object_request *obj_req) { - char *hex = sha1_to_hex(obj_req->sha1); - char prevfile[PATH_MAX]; - char *url; - char *posn; - int prevlocal; - unsigned char prev_buf[PREV_BUF_SIZE]; - ssize_t prev_read = 0; - long prev_posn = 0; - char range[RANGE_HEADER_SIZE]; - struct curl_slist *range_header = NULL; struct active_request_slot *slot; - struct walker_data *data = walker->data; + struct http_object_request *req; - snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename); - unlink(prevfile); - rename(obj_req->tmpfile, prevfile); - unlink(obj_req->tmpfile); - - if (obj_req->local != -1) - error("fd leakage in start: %d", obj_req->local); - obj_req->local = open(obj_req->tmpfile, - O_WRONLY | O_CREAT | O_EXCL, 0666); - /* This could have failed due to the "lazy directory creation"; - * try to mkdir the last path component. - */ - if (obj_req->local < 0 && errno == ENOENT) { - char *dir = strrchr(obj_req->tmpfile, '/'); - if (dir) { - *dir = 0; - mkdir(obj_req->tmpfile, 0777); - *dir = '/'; - } - obj_req->local = open(obj_req->tmpfile, - O_WRONLY | O_CREAT | O_EXCL, 0666); - } - - if (obj_req->local < 0) { + req = new_http_object_request(obj_req->repo->base, obj_req->sha1); + if (req == NULL) { obj_req->state = ABORTED; - error("Couldn't create temporary file %s for %s: %s", - obj_req->tmpfile, obj_req->filename, strerror(errno)); return; } + obj_req->req = req; - memset(&obj_req->stream, 0, sizeof(obj_req->stream)); - - git_inflate_init(&obj_req->stream); - - git_SHA1_Init(&obj_req->c); - - url = xmalloc(strlen(obj_req->repo->base) + 51); - obj_req->url = xmalloc(strlen(obj_req->repo->base) + 51); - strcpy(url, obj_req->repo->base); - posn = url + strlen(obj_req->repo->base); - strcpy(posn, "/objects/"); - posn += 9; - memcpy(posn, hex, 2); - posn += 2; - *(posn++) = '/'; - strcpy(posn, hex + 2); - strcpy(obj_req->url, url); - - /* If a previous temp file is present, process what was already - fetched. */ - prevlocal = open(prevfile, O_RDONLY); - if (prevlocal != -1) { - do { - prev_read = xread(prevlocal, prev_buf, PREV_BUF_SIZE); - if (prev_read>0) { - if (fwrite_sha1_file(prev_buf, - 1, - prev_read, - obj_req) == prev_read) { - prev_posn += prev_read; - } else { - prev_read = -1; - } - } - } while (prev_read > 0); - close(prevlocal); - } - unlink(prevfile); - - /* Reset inflate/SHA1 if there was an error reading the previous temp - file; also rewind to the beginning of the local file. */ - if (prev_read == -1) { - memset(&obj_req->stream, 0, sizeof(obj_req->stream)); - git_inflate_init(&obj_req->stream); - git_SHA1_Init(&obj_req->c); - if (prev_posn>0) { - prev_posn = 0; - lseek(obj_req->local, 0, SEEK_SET); - ftruncate(obj_req->local, 0); - } - } - - slot = get_active_slot(); + slot = req->slot; slot->callback_func = process_object_response; slot->callback_data = obj_req; - obj_req->slot = slot; - - curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req); - curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file); - curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr); - curl_easy_setopt(slot->curl, CURLOPT_URL, url); - curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, data->no_pragma_header); - - /* If we have successfully processed data from a previous fetch - attempt, only fetch the data we don't already have. */ - if (prev_posn>0) { - if (walker->get_verbosely) - fprintf(stderr, - "Resuming fetch of object %s at byte %ld\n", - hex, prev_posn); - sprintf(range, "Range: bytes=%ld-", prev_posn); - range_header = curl_slist_append(range_header, range); - curl_easy_setopt(slot->curl, - CURLOPT_HTTPHEADER, range_header); - } /* Try to get the request started, abort the request on error */ obj_req->state = ACTIVE; if (!start_active_slot(slot)) { obj_req->state = ABORTED; - obj_req->slot = NULL; - close(obj_req->local); obj_req->local = -1; - free(obj_req->url); + release_http_object_request(req); return; } } static void finish_object_request(struct object_request *obj_req) { - struct stat st; - - fchmod(obj_req->local, 0444); - close(obj_req->local); obj_req->local = -1; - - if (obj_req->http_code == 416) { - fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n"); - } else if (obj_req->curl_result != CURLE_OK) { - if (stat(obj_req->tmpfile, &st) == 0) - if (st.st_size == 0) - unlink(obj_req->tmpfile); - return; - } - - git_inflate_end(&obj_req->stream); - git_SHA1_Final(obj_req->real_sha1, &obj_req->c); - if (obj_req->zret != Z_STREAM_END) { - unlink(obj_req->tmpfile); + if (finish_http_object_request(obj_req->req)) return; - } - if (hashcmp(obj_req->sha1, obj_req->real_sha1)) { - unlink(obj_req->tmpfile); - return; - } - obj_req->rename = - move_temp_to_file(obj_req->tmpfile, obj_req->filename); - if (obj_req->rename == 0) + if (obj_req->req->rename == 0) walker_say(obj_req->walker, "got %s\n", sha1_to_hex(obj_req->sha1)); } @@ -268,19 +92,16 @@ static void process_object_response(void *callback_data) struct walker_data *data = walker->data; struct alt_base *alt = data->alt; - obj_req->curl_result = obj_req->slot->curl_result; - obj_req->http_code = obj_req->slot->http_code; - obj_req->slot = NULL; + process_http_object_request(obj_req->req); obj_req->state = COMPLETE; /* Use alternates if necessary */ - if (missing_target(obj_req)) { + if (missing_target(obj_req->req)) { fetch_alternates(walker, alt->base); if (obj_req->repo->next != NULL) { obj_req->repo = obj_req->repo->next; - close(obj_req->local); - obj_req->local = -1; + release_http_object_request(obj_req->req); start_object_request(walker, obj_req); return; } @@ -293,8 +114,8 @@ static void release_object_request(struct object_request *obj_req) { struct object_request *entry = object_queue_head; - if (obj_req->local != -1) - error("fd leakage in release: %d", obj_req->local); + if (obj_req->req !=NULL && obj_req->req->localfile != -1) + error("fd leakage in release: %d", obj_req->req->localfile); if (obj_req == object_queue_head) { object_queue_head = obj_req->next; } else { @@ -304,7 +125,6 @@ static void release_object_request(struct object_request *obj_req) entry->next = entry->next->next; } - free(obj_req->url); free(obj_req); } @@ -332,28 +152,23 @@ static void prefetch(struct walker *walker, unsigned char *sha1) struct object_request *newreq; struct object_request *tail; struct walker_data *data = walker->data; - char *filename = sha1_file_name(sha1); newreq = xmalloc(sizeof(*newreq)); newreq->walker = walker; hashcpy(newreq->sha1, sha1); newreq->repo = data->alt; - newreq->url = NULL; - newreq->local = -1; newreq->state = WAITING; - snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename); - snprintf(newreq->tmpfile, sizeof(newreq->tmpfile), - "%s.temp", filename); - newreq->slot = NULL; + newreq->req = NULL; newreq->next = NULL; + http_is_verbose = walker->get_verbosely; + if (object_queue_head == NULL) { object_queue_head = newreq; } else { tail = object_queue_head; - while (tail->next != NULL) { + while (tail->next != NULL) tail = tail->next; - } tail->next = newreq; } @@ -363,92 +178,6 @@ static void prefetch(struct walker *walker, unsigned char *sha1) #endif } -static int fetch_index(struct walker *walker, struct alt_base *repo, unsigned char *sha1) -{ - char *hex = sha1_to_hex(sha1); - char *filename; - char *url; - char tmpfile[PATH_MAX]; - long prev_posn = 0; - char range[RANGE_HEADER_SIZE]; - struct curl_slist *range_header = NULL; - struct walker_data *data = walker->data; - - FILE *indexfile; - struct active_request_slot *slot; - struct slot_results results; - - if (has_pack_index(sha1)) - return 0; - - if (walker->get_verbosely) - fprintf(stderr, "Getting index for pack %s\n", hex); - - url = xmalloc(strlen(repo->base) + 64); - sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex); - - filename = sha1_pack_index_name(sha1); - snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename); - indexfile = fopen(tmpfile, "a"); - if (!indexfile) - return error("Unable to open local file %s for pack index", - tmpfile); - - slot = get_active_slot(); - slot->results = &results; - curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile); - curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite); - curl_easy_setopt(slot->curl, CURLOPT_URL, url); - curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, data->no_pragma_header); - slot->local = indexfile; - - /* If there is data present from a previous transfer attempt, - resume where it left off */ - prev_posn = ftell(indexfile); - if (prev_posn>0) { - if (walker->get_verbosely) - fprintf(stderr, - "Resuming fetch of index for pack %s at byte %ld\n", - hex, prev_posn); - sprintf(range, "Range: bytes=%ld-", prev_posn); - range_header = curl_slist_append(range_header, range); - curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header); - } - - if (start_active_slot(slot)) { - run_active_slot(slot); - if (results.curl_result != CURLE_OK) { - fclose(indexfile); - return error("Unable to get pack index %s\n%s", url, - curl_errorstr); - } - } else { - fclose(indexfile); - return error("Unable to start request"); - } - - fclose(indexfile); - - return move_temp_to_file(tmpfile, filename); -} - -static int setup_index(struct walker *walker, struct alt_base *repo, unsigned char *sha1) -{ - struct packed_git *new_pack; - if (has_pack_file(sha1)) - return 0; /* don't list this as something we can get */ - - if (fetch_index(walker, repo, sha1)) - return -1; - - new_pack = parse_pack_index(sha1); - if (!new_pack) - return -1; /* parse_pack_index() already issued error message */ - new_pack->next = repo->packs; - repo->packs = new_pack; - return 0; -} - static void process_alternates_response(void *callback_data) { struct alternates_request *alt_req = @@ -505,7 +234,8 @@ static void process_alternates_response(void *callback_data) struct alt_base *newalt; char *target = NULL; if (data[i] == '/') { - /* This counts + /* + * This counts * http://git.host/pub/scm/linux.git/ * -----------here^ * so memcpy(dst, base, serverlen) will @@ -518,7 +248,8 @@ static void process_alternates_response(void *callback_data) okay = 1; } } else if (!memcmp(data + i, "../", 3)) { - /* Relative URL; chop the corresponding + /* + * Relative URL; chop the corresponding * number of subpath from base (and ../ * from data), and concatenate the result. * @@ -547,7 +278,7 @@ static void process_alternates_response(void *callback_data) } /* If the server got removed, give up. */ okay = strchr(base, ':') - base + 3 < - serverlen; + serverlen; } else if (alt_req->http_specific) { char *colon = strchr(data + i, ':'); char *slash = strchr(data + i, '/'); @@ -591,9 +322,11 @@ static void fetch_alternates(struct walker *walker, const char *base) struct alternates_request alt_req; struct walker_data *cdata = walker->data; - /* If another request has already started fetching alternates, - wait for them to arrive and return to processing this request's - curl message */ + /* + * If another request has already started fetching alternates, + * wait for them to arrive and return to processing this request's + * curl message + */ #ifdef USE_CURL_MULTI while (cdata->got_alternates == 0) { step_active_slots(); @@ -613,8 +346,10 @@ static void fetch_alternates(struct walker *walker, const char *base) url = xmalloc(strlen(base) + 31); sprintf(url, "%s/objects/info/http-alternates", base); - /* Use a callback to process the result, since another request - may fail and need to have alternates loaded before continuing */ + /* + * Use a callback to process the result, since another request + * may fail and need to have alternates loaded before continuing + */ slot = get_active_slot(); slot->callback_func = process_alternates_response; alt_req.walker = walker; @@ -641,15 +376,7 @@ static void fetch_alternates(struct walker *walker, const char *base) static int fetch_indices(struct walker *walker, struct alt_base *repo) { - unsigned char sha1[20]; - char *url; - struct strbuf buffer = STRBUF_INIT; - char *data; - int i = 0; - int ret = 0; - - struct active_request_slot *slot; - struct slot_results results; + int ret; if (repo->got_indices) return 0; @@ -657,76 +384,26 @@ static int fetch_indices(struct walker *walker, struct alt_base *repo) if (walker->get_verbosely) fprintf(stderr, "Getting pack list for %s\n", repo->base); - url = xmalloc(strlen(repo->base) + 21); - sprintf(url, "%s/objects/info/packs", repo->base); - - slot = get_active_slot(); - slot->results = &results; - curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer); - curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer); - curl_easy_setopt(slot->curl, CURLOPT_URL, url); - curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL); - if (start_active_slot(slot)) { - run_active_slot(slot); - if (results.curl_result != CURLE_OK) { - if (missing_target(&results)) { - repo->got_indices = 1; - goto cleanup; - } else { - repo->got_indices = 0; - ret = error("%s", curl_errorstr); - goto cleanup; - } - } - } else { + switch (http_get_info_packs(repo->base, &repo->packs)) { + case HTTP_OK: + case HTTP_MISSING_TARGET: + repo->got_indices = 1; + ret = 0; + break; + default: repo->got_indices = 0; - ret = error("Unable to start request"); - goto cleanup; + ret = -1; } - data = buffer.buf; - while (i < buffer.len) { - switch (data[i]) { - case 'P': - i++; - if (i + 52 <= buffer.len && - !prefixcmp(data + i, " pack-") && - !prefixcmp(data + i + 46, ".pack\n")) { - get_sha1_hex(data + i + 6, sha1); - setup_index(walker, repo, sha1); - i += 51; - break; - } - default: - while (i < buffer.len && data[i] != '\n') - i++; - } - i++; - } - - repo->got_indices = 1; -cleanup: - strbuf_release(&buffer); - free(url); return ret; } static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1) { - char *url; struct packed_git *target; - struct packed_git **lst; - FILE *packfile; - char *filename; - char tmpfile[PATH_MAX]; int ret; - long prev_posn = 0; - char range[RANGE_HEADER_SIZE]; - struct curl_slist *range_header = NULL; - struct walker_data *data = walker->data; - - struct active_request_slot *slot; struct slot_results results; + struct http_pack_request *preq; if (fetch_indices(walker, repo)) return -1; @@ -741,80 +418,37 @@ static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned cha sha1_to_hex(sha1)); } - url = xmalloc(strlen(repo->base) + 65); - sprintf(url, "%s/objects/pack/pack-%s.pack", - repo->base, sha1_to_hex(target->sha1)); + preq = new_http_pack_request(target, repo->base); + if (preq == NULL) + goto abort; + preq->lst = &repo->packs; + preq->slot->results = &results; - filename = sha1_pack_name(target->sha1); - snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename); - packfile = fopen(tmpfile, "a"); - if (!packfile) - return error("Unable to open local file %s for pack", - tmpfile); - - slot = get_active_slot(); - slot->results = &results; - curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile); - curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite); - curl_easy_setopt(slot->curl, CURLOPT_URL, url); - curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, data->no_pragma_header); - slot->local = packfile; - - /* If there is data present from a previous transfer attempt, - resume where it left off */ - prev_posn = ftell(packfile); - if (prev_posn>0) { - if (walker->get_verbosely) - fprintf(stderr, - "Resuming fetch of pack %s at byte %ld\n", - sha1_to_hex(target->sha1), prev_posn); - sprintf(range, "Range: bytes=%ld-", prev_posn); - range_header = curl_slist_append(range_header, range); - curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header); - } - - if (start_active_slot(slot)) { - run_active_slot(slot); + if (start_active_slot(preq->slot)) { + run_active_slot(preq->slot); if (results.curl_result != CURLE_OK) { - fclose(packfile); - return error("Unable to get pack file %s\n%s", url, - curl_errorstr); + error("Unable to get pack file %s\n%s", preq->url, + curl_errorstr); + goto abort; } } else { - fclose(packfile); - return error("Unable to start request"); + error("Unable to start request"); + goto abort; } - target->pack_size = ftell(packfile); - fclose(packfile); - - ret = move_temp_to_file(tmpfile, filename); + ret = finish_http_pack_request(preq); + release_http_pack_request(preq); if (ret) return ret; - lst = &repo->packs; - while (*lst != target) - lst = &((*lst)->next); - *lst = (*lst)->next; - - if (verify_pack(target)) - return -1; - install_packed_git(target); - return 0; + +abort: + return -1; } static void abort_object_request(struct object_request *obj_req) { - if (obj_req->local >= 0) { - close(obj_req->local); - obj_req->local = -1; - } - unlink(obj_req->tmpfile); - if (obj_req->slot) { - release_active_slot(obj_req->slot); - obj_req->slot = NULL; - } release_object_request(obj_req); } @@ -823,6 +457,7 @@ static int fetch_object(struct walker *walker, struct alt_base *repo, unsigned c char *hex = sha1_to_hex(sha1); int ret = 0; struct object_request *obj_req = object_queue_head; + struct http_object_request *req; while (obj_req != NULL && hashcmp(obj_req->sha1, sha1)) obj_req = obj_req->next; @@ -830,45 +465,55 @@ static int fetch_object(struct walker *walker, struct alt_base *repo, unsigned c return error("Couldn't find request for %s in the queue", hex); if (has_sha1_file(obj_req->sha1)) { + if (obj_req->req != NULL) + abort_http_object_request(obj_req->req); abort_object_request(obj_req); return 0; } #ifdef USE_CURL_MULTI - while (obj_req->state == WAITING) { + while (obj_req->state == WAITING) step_active_slots(); - } #else start_object_request(walker, obj_req); #endif - while (obj_req->state == ACTIVE) { - run_active_slot(obj_req->slot); - } - if (obj_req->local != -1) { - close(obj_req->local); obj_req->local = -1; + /* + * obj_req->req might change when fetching alternates in the callback + * process_object_response; therefore, the "shortcut" variable, req, + * is used only after we're done with slots. + */ + while (obj_req->state == ACTIVE) + run_active_slot(obj_req->req->slot); + + req = obj_req->req; + + if (req->localfile != -1) { + close(req->localfile); + req->localfile = -1; } if (obj_req->state == ABORTED) { ret = error("Request for %s aborted", hex); - } else if (obj_req->curl_result != CURLE_OK && - obj_req->http_code != 416) { - if (missing_target(obj_req)) + } else if (req->curl_result != CURLE_OK && + req->http_code != 416) { + if (missing_target(req)) ret = -1; /* Be silent, it is probably in a pack. */ else ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)", - obj_req->errorstr, obj_req->curl_result, - obj_req->http_code, hex); - } else if (obj_req->zret != Z_STREAM_END) { + req->errorstr, req->curl_result, + req->http_code, hex); + } else if (req->zret != Z_STREAM_END) { walker->corrupt_object_found++; - ret = error("File %s (%s) corrupt", hex, obj_req->url); - } else if (hashcmp(obj_req->sha1, obj_req->real_sha1)) { + ret = error("File %s (%s) corrupt", hex, req->url); + } else if (hashcmp(obj_req->sha1, req->real_sha1)) { ret = error("File %s has bad hash", hex); - } else if (obj_req->rename < 0) { + } else if (req->rename < 0) { ret = error("unable to write sha1 filename %s", - obj_req->filename); + req->filename); } + release_http_object_request(req); release_object_request(obj_req); return ret; } @@ -898,10 +543,7 @@ static int fetch_ref(struct walker *walker, struct ref *ref) static void cleanup(struct walker *walker) { - struct walker_data *data = walker->data; http_cleanup(); - - curl_slist_free_all(data->no_pragma_header); } struct walker *get_http_walker(const char *url, struct remote *remote) @@ -912,8 +554,6 @@ struct walker *get_http_walker(const char *url, struct remote *remote) http_init(remote); - data->no_pragma_header = curl_slist_append(NULL, "Pragma:"); - data->alt = xmalloc(sizeof(*data->alt)); data->alt->base = xmalloc(strlen(url) + 1); strcpy(data->alt->base, url); |