#include "cache.h" #include "repository.h" #include "commit.h" #include "walker.h" #include "http.h" #include "list.h" #include "transport.h" #include "packfile.h" #include "object-store.h" struct alt_base { char *base; int got_indices; struct packed_git *packs; struct alt_base *next; }; enum object_request_state { WAITING, ABORTED, ACTIVE, COMPLETE }; struct object_request { struct walker *walker; struct object_id oid; struct alt_base *repo; enum object_request_state state; struct http_object_request *req; struct list_head node; }; struct alternates_request { struct walker *walker; const char *base; struct strbuf *url; struct strbuf *buffer; struct active_request_slot *slot; int http_specific; }; struct walker_data { const char *url; int got_alternates; struct alt_base *alt; }; static LIST_HEAD(object_queue_head); static void fetch_alternates(struct walker *walker, const char *base); static void process_object_response(void *callback_data); static void start_object_request(struct walker *walker, struct object_request *obj_req) { struct active_request_slot *slot; struct http_object_request *req; req = new_http_object_request(obj_req->repo->base, &obj_req->oid); if (req == NULL) { obj_req->state = ABORTED; return; } obj_req->req = req; slot = req->slot; slot->callback_func = process_object_response; slot->callback_data = obj_req; /* Try to get the request started, abort the request on error */ obj_req->state = ACTIVE; if (!start_active_slot(slot)) { obj_req->state = ABORTED; release_http_object_request(req); return; } } static void finish_object_request(struct object_request *obj_req) { if (finish_http_object_request(obj_req->req)) return; if (obj_req->req->rename == 0) walker_say(obj_req->walker, "got %s\n", oid_to_hex(&obj_req->oid)); } static void process_object_response(void *callback_data) { struct object_request *obj_req = (struct object_request *)callback_data; struct walker *walker = obj_req->walker; struct walker_data *data = walker->data; struct alt_base *alt = data->alt; process_http_object_request(obj_req->req); obj_req->state = COMPLETE; normalize_curl_result(&obj_req->req->curl_result, obj_req->req->http_code, obj_req->req->errorstr, sizeof(obj_req->req->errorstr)); /* Use alternates if necessary */ if (missing_target(obj_req->req)) { fetch_alternates(walker, alt->base); if (obj_req->repo->next != NULL) { obj_req->repo = obj_req->repo->next; release_http_object_request(obj_req->req); start_object_request(walker, obj_req); return; } } finish_object_request(obj_req); } static void release_object_request(struct object_request *obj_req) { if (obj_req->req !=NULL && obj_req->req->localfile != -1) error("fd leakage in release: %d", obj_req->req->localfile); list_del(&obj_req->node); free(obj_req); } static int fill_active_slot(struct walker *walker) { struct object_request *obj_req; struct list_head *pos, *tmp, *head = &object_queue_head; list_for_each_safe(pos, tmp, head) { obj_req = list_entry(pos, struct object_request, node); if (obj_req->state == WAITING) { if (has_object_file(&obj_req->oid)) obj_req->state = COMPLETE; else { start_object_request(walker, obj_req); return 1; } } } return 0; } static void prefetch(struct walker *walker, unsigned char *sha1) { struct object_request *newreq; struct walker_data *data = walker->data; newreq = xmalloc(sizeof(*newreq)); newreq->walker = walker; oidread(&newreq->oid, sha1); newreq->repo = data->alt; newreq->state = WAITING; newreq->req = NULL; http_is_verbose = walker->get_verbosely; list_add_tail(&newreq->node, &object_queue_head); fill_active_slots(); step_active_slots(); } static int is_alternate_allowed(const char *url) { const char *protocols[] = { "http", "https", "ftp", "ftps" }; int i; if (http_follow_config != HTTP_FOLLOW_ALWAYS) { warning("alternate disabled by http.followRedirects: %s", url); return 0; } for (i = 0; i < ARRAY_SIZE(protocols); i++) { const char *end; if (skip_prefix(url, protocols[i], &end) && starts_with(end, "://")) break; } if (i >= ARRAY_SIZE(protocols)) { warning("ignoring alternate with unknown protocol: %s", url); return 0; } if (!is_transport_allowed(protocols[i], 0)) { warning("ignoring alternate with restricted protocol: %s", url); return 0; } return 1; } static void process_alternates_response(void *callback_data) { struct alternates_request *alt_req = (struct alternates_request *)callback_data; struct walker *walker = alt_req->walker; struct walker_data *cdata = walker->data; struct active_request_slot *slot = alt_req->slot; struct alt_base *tail = cdata->alt; const char *base = alt_req->base; const char null_byte = '\0'; char *data; int i = 0; normalize_curl_result(&slot->curl_result, slot->http_code, curl_errorstr, sizeof(curl_errorstr)); if (alt_req->http_specific) { if (slot->curl_result != CURLE_OK || !alt_req->buffer->len) { /* Try reusing the slot to get non-http alternates */ alt_req->http_specific = 0; strbuf_reset(alt_req->url); strbuf_addf(alt_req->url, "%s/objects/info/alternates", base); curl_easy_setopt(slot->curl, CURLOPT_URL, alt_req->url->buf); active_requests++; slot->in_use = 1; if (slot->finished != NULL) (*slot->finished) = 0; if (!start_active_slot(slot)) { cdata->got_alternates = -1; slot->in_use = 0; if (slot->finished != NULL) (*slot->finished) = 1; } return; } } else if (slot->curl_result != CURLE_OK) { if (!missing_target(slot)) { cdata->got_alternates = -1; return; } } fwrite_buffer((char *)&null_byte, 1, 1, alt_req->buffer); alt_req->buffer->len--; data = alt_req->buffer->buf; while (i < alt_req->buffer->len) { int posn = i; while (posn < alt_req->buffer->len && data[posn] != '\n') posn++; if (data[posn] == '\n') { int okay = 0; int serverlen = 0; struct alt_base *newalt; if (data[i] == '/') { /* * This counts * http://git.host/pub/scm/linux.git/ * -----------here^ * so memcpy(dst, base, serverlen) will * copy up to "...git.host". */ const char *colon_ss = strstr(base,"://"); if (colon_ss) { serverlen = (strchr(colon_ss + 3, '/') - base); okay = 1; } } else if (!memcmp(data + i, "../", 3)) { /* * Relative URL; chop the corresponding * number of subpath from base (and ../ * from data), and concatenate the result. * * The code first drops ../ from data, and * then drops one ../ from data and one path * from base. IOW, one extra ../ is dropped * from data than path is dropped from base. * * This is not wrong. The alternate in * http://git.host/pub/scm/linux.git/ * to borrow from * http://git.host/pub/scm/linus.git/ * is ../../linus.git/objects/. You need * two ../../ to borrow from your direct * neighbour. */ i += 3; serverlen = strlen(base); while (i + 2 < posn && !memcmp(data + i, "../", 3)) { do { serverlen--; } while (serverlen && base[serverlen - 1] != '/'); i += 3; } /* If the server got removed, give up. */ okay = strchr(base, ':') - base + 3 < serverlen; } else if (alt_req->http_specific) { char *colon = strchr(data + i, ':'); char *slash = strchr(data + i, '/'); if (colon && slash && colon < data + posn && slash < data + posn && colon < slash) { okay = 1; } } if (okay) { struct strbuf target = STRBUF_INIT; strbuf_add(&target, base, serverlen); strbuf_add(&target, data + i, posn - i); if (!strbuf_strip_suffix(&target, "objects")) { warning("ignoring alternate that does" " not end in 'objects': %s", target.buf); strbuf_release(&target); } else if (is_alternate_allowed(target.buf)) { warning("adding alternate object store: %s", target.buf); newalt = xmalloc(sizeof(*newalt)); newalt->next = NULL; newalt->base = strbuf_detach(&target, NULL); newalt->got_indices = 0; newalt->packs = NULL; while (tail->next != NULL) tail = tail->next; tail->next = newalt; } else { strbuf_release(&target); } } } i = posn + 1; } cdata->got_alternates = 1; } static void fetch_alternates(struct walker *walker, const char *base) { struct strbuf buffer = STRBUF_INIT; struct strbuf url = STRBUF_INIT; struct active_request_slot *slot; struct alternates_request alt_req; struct walker_data *cdata = walker->data; /* * If another request has already started fetching alternates, * wait for them to arrive and return to processing this request's * curl message */ while (cdata->got_alternates == 0) { step_active_slots(); } /* Nothing to do if they've already been fetched */ if (cdata->got_alternates == 1) return; /* Start the fetch */ cdata->got_alternates = 0; if (walker->get_verbosely) fprintf(stderr, "Getting alternates list for %s\n", base); strbuf_addf(&url, "%s/objects/info/http-alternates", base); /* * Use a callback to process the result, since another request * may fail and need to have alternates loaded before continuing */ slot = get_active_slot(); slot->callback_func = process_alternates_response; alt_req.walker = walker; slot->callback_data = &alt_req; curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer); curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer); curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf); alt_req.base = base; alt_req.url = &url; alt_req.buffer = &buffer; alt_req.http_specific = 1; alt_req.slot = slot; if (start_active_slot(slot)) run_active_slot(slot); else cdata->got_alternates = -1; strbuf_release(&buffer); strbuf_release(&url); } static int fetch_indices(struct walker *walker, struct alt_base *repo) { int ret; if (repo->got_indices) return 0; if (walker->get_verbosely) fprintf(stderr, "Getting pack list for %s\n", repo->base); switch (http_get_info_packs(repo->base, &repo->packs)) { case HTTP_OK: case HTTP_MISSING_TARGET: repo->got_indices = 1; ret = 0; break; default: repo->got_indices = 0; ret = -1; } return ret; } static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigned char *sha1) { struct packed_git *target; int ret; struct slot_results results; struct http_pack_request *preq; if (fetch_indices(walker, repo)) return -1; target = find_sha1_pack(sha1, repo->packs); if (!target) return -1; close_pack_index(target); if (walker->get_verbosely) { fprintf(stderr, "Getting pack %s\n", hash_to_hex(target->hash)); fprintf(stderr, " which contains %s\n", hash_to_hex(sha1)); } preq = new_http_pack_request(target->hash, repo->base); if (preq == NULL) goto abort; preq->slot->results = &results; if (start_active_slot(preq->slot)) { run_active_slot(preq->slot); if (results.curl_result != CURLE_OK) { error("Unable to get pack file %s\n%s", preq->url, curl_errorstr); goto abort; } } else { error("Unable to start request"); goto abort; } ret = finish_http_pack_request(preq); release_http_pack_request(preq); if (ret) return ret; http_install_packfile(target, &repo->packs); return 0; abort: return -1; } static void abort_object_request(struct object_request *obj_req) { release_object_request(obj_req); } static int fetch_object(struct walker *walker, unsigned char *hash) { char *hex = hash_to_hex(hash); int ret = 0; struct object_request *obj_req = NULL; struct http_object_request *req; struct list_head *pos, *head = &object_queue_head; list_for_each(pos, head) { obj_req = list_entry(pos, struct object_request, node); if (hasheq(obj_req->oid.hash, hash)) break; } if (obj_req == NULL) return error("Couldn't find request for %s in the queue", hex); if (has_object_file(&obj_req->oid)) { if (obj_req->req != NULL) abort_http_object_request(obj_req->req); abort_object_request(obj_req); return 0; } while (obj_req->state == WAITING) step_active_slots(); /* * obj_req->req might change when fetching alternates in the callback * process_object_response; therefore, the "shortcut" variable, req, * is used only after we're done with slots. */ while (obj_req->state == ACTIVE) run_active_slot(obj_req->req->slot); req = obj_req->req; if (req->localfile != -1) { close(req->localfile); req->localfile = -1; } normalize_curl_result(&req->curl_result, req->http_code, req->errorstr, sizeof(req->errorstr)); if (obj_req->state == ABORTED) { ret = error("Request for %s aborted", hex); } else if (req->curl_result != CURLE_OK && req->http_code != 416) { if (missing_target(req)) ret = -1; /* Be silent, it is probably in a pack. */ else ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)", req->errorstr, req->curl_result, req->http_code, hex); } else if (req->zret != Z_STREAM_END) { walker->corrupt_object_found++; ret = error("File %s (%s) corrupt", hex, req->url); } else if (!oideq(&obj_req->oid, &req->real_oid)) { ret = error("File %s has bad hash", hex); } else if (req->rename < 0) { struct strbuf buf = STRBUF_INIT; loose_object_path(the_repository, &buf, &req->oid); ret = error("unable to write sha1 filename %s", buf.buf); strbuf_release(&buf); } release_http_object_request(req); release_object_request(obj_req); return ret; } static int fetch(struct walker *walker, unsigned char *hash) { struct walker_data *data = walker->data; struct alt_base *altbase = data->alt; if (!fetch_object(walker, hash)) return 0; while (altbase) { if (!http_fetch_pack(walker, altbase, hash)) return 0; fetch_alternates(walker, data->alt->base); altbase = altbase->next; } return error("Unable to find %s under %s", hash_to_hex(hash), data->alt->base); } static int fetch_ref(struct walker *walker, struct ref *ref) { struct walker_data *data = walker->data; return http_fetch_ref(data->alt->base, ref); } static void cleanup(struct walker *walker) { struct walker_data *data = walker->data; struct alt_base *alt, *alt_next; if (data) { alt = data->alt; while (alt) { alt_next = alt->next; free(alt->base); free(alt); alt = alt_next; } free(data); walker->data = NULL; } } struct walker *get_http_walker(const char *url) { char *s; struct walker_data *data = xmalloc(sizeof(struct walker_data)); struct walker *walker = xmalloc(sizeof(struct walker)); data->alt = xmalloc(sizeof(*data->alt)); data->alt->base = xstrdup(url); for (s = data->alt->base + strlen(data->alt->base) - 1; *s == '/'; --s) *s = 0; data->alt->got_indices = 0; data->alt->packs = NULL; data->alt->next = NULL; data->got_alternates = -1; walker->corrupt_object_found = 0; walker->fetch = fetch; walker->fetch_ref = fetch_ref; walker->prefetch = prefetch; walker->cleanup = cleanup; walker->data = data; add_fill_function(walker, (int (*)(void *)) fill_active_slot); return walker; }