diff options
Diffstat (limited to 'packfile.c')
-rw-r--r-- | packfile.c | 670 |
1 files changed, 444 insertions, 226 deletions
diff --git a/packfile.c b/packfile.c index 4a5fe7ab18..f2850a00b5 100644 --- a/packfile.c +++ b/packfile.c @@ -1,6 +1,7 @@ #include "cache.h" -#include "mru.h" +#include "list.h" #include "pack.h" +#include "repository.h" #include "dir.h" #include "mergesort.h" #include "packfile.h" @@ -8,6 +9,13 @@ #include "list.h" #include "streaming.h" #include "sha1-lookup.h" +#include "commit.h" +#include "object.h" +#include "tag.h" +#include "tree-walk.h" +#include "tree.h" +#include "object-store.h" +#include "midx.h" char *odb_pack_name(struct strbuf *buf, const unsigned char *sha1, @@ -39,8 +47,6 @@ static unsigned int pack_open_fds; static unsigned int pack_max_fds; static size_t peak_pack_mapped; static size_t pack_mapped; -struct packed_git *packed_git; -struct mru packed_git_mru; #define SZ_FMT PRIuMAX static inline uintmax_t sz_fmt(size_t s) { return s; } @@ -74,11 +80,10 @@ void pack_report(void) static int check_packed_git_idx(const char *path, struct packed_git *p) { void *idx_map; - struct pack_idx_header *hdr; size_t idx_size; - uint32_t version, nr, i, *index; - int fd = git_open(path); + int fd = git_open(path), ret; struct stat st; + const unsigned int hashsz = the_hash_algo->rawsz; if (fd < 0) return -1; @@ -87,23 +92,39 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) return -1; } idx_size = xsize_t(st.st_size); - if (idx_size < 4 * 256 + 20 + 20) { + if (idx_size < 4 * 256 + hashsz + hashsz) { close(fd); return error("index file %s is too small", path); } idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - hdr = idx_map; + ret = load_idx(path, hashsz, idx_map, idx_size, p); + + if (ret) + munmap(idx_map, idx_size); + + return ret; +} + +int load_idx(const char *path, const unsigned int hashsz, void *idx_map, + size_t idx_size, struct packed_git *p) +{ + struct pack_idx_header *hdr = idx_map; + uint32_t version, nr, i, *index; + + if (idx_size < 4 * 256 + hashsz + hashsz) + return error("index file %s is too small", path); + if (idx_map == NULL) + return error("empty data"); + if (hdr->idx_signature == htonl(PACK_IDX_SIGNATURE)) { version = ntohl(hdr->idx_version); - if (version < 2 || version > 2) { - munmap(idx_map, idx_size); + if (version < 2 || version > 2) return error("index file %s is version %"PRIu32 " and is not supported by this binary" " (try upgrading GIT to a newer version)", path, version); - } } else version = 1; @@ -113,10 +134,8 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) index += 2; /* skip index header */ for (i = 0; i < 256; i++) { uint32_t n = ntohl(index[i]); - if (n < nr) { - munmap(idx_map, idx_size); + if (n < nr) return error("non-monotonic index %s", path); - } nr = n; } @@ -124,46 +143,40 @@ static int check_packed_git_idx(const char *path, struct packed_git *p) /* * Total size: * - 256 index entries 4 bytes each - * - 24-byte entries * nr (20-byte sha1 + 4-byte offset) - * - 20-byte SHA1 of the packfile - * - 20-byte SHA1 file checksum + * - 24-byte entries * nr (object ID + 4-byte offset) + * - hash of the packfile + * - file checksum */ - if (idx_size != 4*256 + nr * 24 + 20 + 20) { - munmap(idx_map, idx_size); + if (idx_size != 4 * 256 + nr * (hashsz + 4) + hashsz + hashsz) return error("wrong index v1 file size in %s", path); - } } else if (version == 2) { /* * Minimum size: * - 8 bytes of header * - 256 index entries 4 bytes each - * - 20-byte sha1 entry * nr + * - object ID entry * nr * - 4-byte crc entry * nr * - 4-byte offset entry * nr - * - 20-byte SHA1 of the packfile - * - 20-byte SHA1 file checksum + * - hash of the packfile + * - file checksum * And after the 4-byte offset table might be a * variable sized table containing 8-byte entries * for offsets larger than 2^31. */ - unsigned long min_size = 8 + 4*256 + nr*(20 + 4 + 4) + 20 + 20; + unsigned long min_size = 8 + 4*256 + nr*(hashsz + 4 + 4) + hashsz + hashsz; unsigned long max_size = min_size; if (nr) max_size += (nr - 1)*8; - if (idx_size < min_size || idx_size > max_size) { - munmap(idx_map, idx_size); + if (idx_size < min_size || idx_size > max_size) return error("wrong index v2 file size in %s", path); - } if (idx_size != min_size && /* * make sure we can deal with large pack offsets. * 31-bit signed offset won't be enough, neither * 32-bit unsigned one will be. */ - (sizeof(off_t) <= 4)) { - munmap(idx_map, idx_size); + (sizeof(off_t) <= 4)) return error("pack too large for current definition of off_t in %s", path); - } } p->index_version = version; @@ -183,13 +196,30 @@ int open_pack_index(struct packed_git *p) return 0; if (!strip_suffix(p->pack_name, ".pack", &len)) - die("BUG: pack_name does not end in .pack"); + BUG("pack_name does not end in .pack"); idx_name = xstrfmt("%.*s.idx", (int)len, p->pack_name); ret = check_packed_git_idx(idx_name, p); free(idx_name); return ret; } +uint32_t get_pack_fanout(struct packed_git *p, uint32_t value) +{ + const uint32_t *level1_ofs = p->index_data; + + if (!level1_ofs) { + if (open_pack_index(p)) + return 0; + level1_ofs = p->index_data; + } + + if (p->index_version > 1) { + level1_ofs += 2; + } + + return ntohl(level1_ofs[value]); +} + static struct packed_git *alloc_packed_git(int extra) { struct packed_git *p = xmalloc(st_add(sizeof(*p), extra)); @@ -240,7 +270,7 @@ static int unuse_one_window(struct packed_git *current) if (current) scan_windows(current, &lru_p, &lru_w, &lru_l); - for (p = packed_git; p; p = p->next) + for (p = the_repository->objects->packed_git; p; p = p->next) scan_windows(p, &lru_p, &lru_w, &lru_l); if (lru_p) { munmap(lru_w->base, lru_w->len); @@ -299,20 +329,20 @@ void close_pack_index(struct packed_git *p) } } -static void close_pack(struct packed_git *p) +void close_pack(struct packed_git *p) { close_pack_windows(p); close_pack_fd(p); close_pack_index(p); } -void close_all_packs(void) +void close_all_packs(struct raw_object_store *o) { struct packed_git *p; - for (p = packed_git; p; p = p->next) + for (p = o->packed_git; p; p = p->next) if (p->do_not_close) - die("BUG: want to close pack marked 'do-not-close'"); + BUG("want to close pack marked 'do-not-close'"); else close_pack(p); } @@ -378,7 +408,7 @@ static int close_one_pack(void) struct pack_window *mru_w = NULL; int accept_windows_inuse = 1; - for (p = packed_git; p; p = p->next) { + for (p = the_repository->objects->packed_git; p; p = p->next) { if (p->pack_fd == -1) continue; find_lru_pack(p, &lru_p, &mru_w, &accept_windows_inuse); @@ -439,13 +469,25 @@ static int open_packed_git_1(struct packed_git *p) { struct stat st; struct pack_header hdr; - unsigned char sha1[20]; - unsigned char *idx_sha1; + unsigned char hash[GIT_MAX_RAWSZ]; + unsigned char *idx_hash; long fd_flag; ssize_t read_result; + const unsigned hashsz = the_hash_algo->rawsz; + + if (!p->index_data) { + struct multi_pack_index *m; + const char *pack_name = strrchr(p->pack_name, '/'); - if (!p->index_data && open_pack_index(p)) - return error("packfile %s index unavailable", p->pack_name); + for (m = the_repository->objects->multi_pack_index; + m; m = m->next) { + if (midx_contains_pack(m, pack_name)) + break; + } + + if (!m && open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); + } if (!pack_max_fds) { unsigned int max_fds = get_max_fd_limit(); @@ -496,21 +538,25 @@ static int open_packed_git_1(struct packed_git *p) " supported (try upgrading GIT to a newer version)", p->pack_name, ntohl(hdr.hdr_version)); + /* Skip index checking if in multi-pack-index */ + if (!p->index_data) + return 0; + /* Verify the pack matches its index. */ if (p->num_objects != ntohl(hdr.hdr_entries)) return error("packfile %s claims to have %"PRIu32" objects" " while index indicates %"PRIu32" objects", p->pack_name, ntohl(hdr.hdr_entries), p->num_objects); - if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1) + if (lseek(p->pack_fd, p->pack_size - hashsz, SEEK_SET) == -1) return error("end of packfile %s is unavailable", p->pack_name); - read_result = read_in_full(p->pack_fd, sha1, sizeof(sha1)); + read_result = read_in_full(p->pack_fd, hash, hashsz); if (read_result < 0) return error_errno("error reading from %s", p->pack_name); - if (read_result != sizeof(sha1)) + if (read_result != hashsz) return error("packfile %s signature is unavailable", p->pack_name); - idx_sha1 = ((unsigned char *)p->index_data) + p->index_size - 40; - if (hashcmp(sha1, idx_sha1)) + idx_hash = ((unsigned char *)p->index_data) + p->index_size - hashsz * 2; + if (!hasheq(hash, idx_hash)) return error("packfile %s does not match index", p->pack_name); return 0; } @@ -525,7 +571,7 @@ static int open_packed_git(struct packed_git *p) static int in_window(struct pack_window *win, off_t offset) { - /* We must promise at least 20 bytes (one hash) after the + /* We must promise at least one full hash after the * offset is available from this window, otherwise the offset * is not actually in this window and a different window (which * has that one hash excess) must be used. This is to support @@ -533,7 +579,7 @@ static int in_window(struct pack_window *win, off_t offset) */ off_t win_off = win->offset; return win_off <= offset - && (offset + 20) <= (win_off + win->len); + && (offset + the_hash_algo->rawsz) <= (win_off + win->len); } unsigned char *use_pack(struct packed_git *p, @@ -550,7 +596,7 @@ unsigned char *use_pack(struct packed_git *p, */ if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p)) die("packfile %s cannot be accessed", p->pack_name); - if (offset > (p->pack_size - 20)) + if (offset > (p->pack_size - the_hash_algo->rawsz)) die("offset beyond end of packfile (truncated pack?)"); if (offset < 0) die(_("offset before end of packfile (broken .idx?)")); @@ -643,10 +689,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) return NULL; /* - * ".pack" is long enough to hold any suffix we're adding (and + * ".promisor" is long enough to hold any suffix we're adding (and * the use xsnprintf double-checks that) */ - alloc = st_add3(path_len, strlen(".pack"), 1); + alloc = st_add3(path_len, strlen(".promisor"), 1); p = alloc_packed_git(alloc); memcpy(p->pack_name, path, path_len); @@ -654,6 +700,10 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) if (!access(p->pack_name, F_OK)) p->pack_keep = 1; + xsnprintf(p->pack_name + path_len, alloc - path_len, ".promisor"); + if (!access(p->pack_name, F_OK)) + p->pack_promisor = 1; + xsnprintf(p->pack_name + path_len, alloc - path_len, ".pack"); if (stat(p->pack_name, &st) || !S_ISREG(st.st_mode)) { free(p); @@ -666,18 +716,19 @@ struct packed_git *add_packed_git(const char *path, size_t path_len, int local) p->pack_size = st.st_size; p->pack_local = local; p->mtime = st.st_mtime; - if (path_len < 40 || get_sha1_hex(path + path_len - 40, p->sha1)) + if (path_len < the_hash_algo->hexsz || + get_sha1_hex(path + path_len - the_hash_algo->hexsz, p->sha1)) hashclr(p->sha1); return p; } -void install_packed_git(struct packed_git *pack) +void install_packed_git(struct repository *r, struct packed_git *pack) { if (pack->pack_fd != -1) pack_open_fds++; - pack->next = packed_git; - packed_git = pack; + pack->next = r->objects->packed_git; + r->objects->packed_git = pack; } void (*report_garbage)(unsigned seen_bits, const char *path); @@ -726,13 +777,14 @@ static void report_pack_garbage(struct string_list *list) report_helper(list, seen_bits, first, list->nr); } -static void prepare_packed_git_one(char *objdir, int local) +void for_each_file_in_pack_dir(const char *objdir, + each_file_in_pack_dir_fn fn, + void *data) { struct strbuf path = STRBUF_INIT; size_t dirnamelen; DIR *dir; struct dirent *de; - struct string_list garbage = STRING_LIST_INIT_DUP; strbuf_addstr(&path, objdir); strbuf_addstr(&path, "/pack"); @@ -747,53 +799,88 @@ static void prepare_packed_git_one(char *objdir, int local) strbuf_addch(&path, '/'); dirnamelen = path.len; while ((de = readdir(dir)) != NULL) { - struct packed_git *p; - size_t base_len; - if (is_dot_or_dotdot(de->d_name)) continue; strbuf_setlen(&path, dirnamelen); strbuf_addstr(&path, de->d_name); - base_len = path.len; - if (strip_suffix_mem(path.buf, &base_len, ".idx")) { - /* Don't reopen a pack we already have. */ - for (p = packed_git; p; p = p->next) { - size_t len; - if (strip_suffix(p->pack_name, ".pack", &len) && - len == base_len && - !memcmp(p->pack_name, path.buf, len)) - break; - } - if (p == NULL && - /* - * See if it really is a valid .idx file with - * corresponding .pack file that we can map. - */ - (p = add_packed_git(path.buf, path.len, local)) != NULL) - install_packed_git(p); - } - - if (!report_garbage) - continue; - - if (ends_with(de->d_name, ".idx") || - ends_with(de->d_name, ".pack") || - ends_with(de->d_name, ".bitmap") || - ends_with(de->d_name, ".keep")) - string_list_append(&garbage, path.buf); - else - report_garbage(PACKDIR_FILE_GARBAGE, path.buf); + fn(path.buf, path.len, de->d_name, data); } + closedir(dir); - report_pack_garbage(&garbage); - string_list_clear(&garbage, 0); strbuf_release(&path); } -static int approximate_object_count_valid; +struct prepare_pack_data { + struct repository *r; + struct string_list *garbage; + int local; + struct multi_pack_index *m; +}; + +static void prepare_pack(const char *full_name, size_t full_name_len, + const char *file_name, void *_data) +{ + struct prepare_pack_data *data = (struct prepare_pack_data *)_data; + struct packed_git *p; + size_t base_len = full_name_len; + + if (strip_suffix_mem(full_name, &base_len, ".idx") && + !(data->m && midx_contains_pack(data->m, file_name))) { + /* Don't reopen a pack we already have. */ + for (p = data->r->objects->packed_git; p; p = p->next) { + size_t len; + if (strip_suffix(p->pack_name, ".pack", &len) && + len == base_len && + !memcmp(p->pack_name, full_name, len)) + break; + } + + if (!p) { + p = add_packed_git(full_name, full_name_len, data->local); + if (p) + install_packed_git(data->r, p); + } + } + + if (!report_garbage) + return; + + if (!strcmp(file_name, "multi-pack-index")) + return; + if (ends_with(file_name, ".idx") || + ends_with(file_name, ".pack") || + ends_with(file_name, ".bitmap") || + ends_with(file_name, ".keep") || + ends_with(file_name, ".promisor")) + string_list_append(data->garbage, full_name); + else + report_garbage(PACKDIR_FILE_GARBAGE, full_name); +} + +static void prepare_packed_git_one(struct repository *r, char *objdir, int local) +{ + struct prepare_pack_data data; + struct string_list garbage = STRING_LIST_INIT_DUP; + + data.m = r->objects->multi_pack_index; + + /* look for the multi-pack-index for this object directory */ + while (data.m && strcmp(data.m->object_dir, objdir)) + data.m = data.m->next; + + data.r = r; + data.garbage = &garbage; + data.local = local; + for_each_file_in_pack_dir(objdir, prepare_pack, &data); + + report_pack_garbage(data.garbage); + string_list_clear(data.garbage, 0); +} + +static void prepare_packed_git(struct repository *r); /* * Give a fast, rough count of the number of objects in the repository. This * ignores loose objects completely. If you have a lot of them, then either @@ -803,19 +890,23 @@ static int approximate_object_count_valid; */ unsigned long approximate_object_count(void) { - static unsigned long count; - if (!approximate_object_count_valid) { + if (!the_repository->objects->approximate_object_count_valid) { + unsigned long count; + struct multi_pack_index *m; struct packed_git *p; - prepare_packed_git(); + prepare_packed_git(the_repository); count = 0; - for (p = packed_git; p; p = p->next) { + for (m = get_multi_pack_index(the_repository); m; m = m->next) + count += m->num_objects; + for (p = the_repository->objects->packed_git; p; p = p->next) { if (open_pack_index(p)) continue; count += p->num_objects; } + the_repository->objects->approximate_object_count = count; } - return count; + return the_repository->objects->approximate_object_count; } static void *get_next_packed_git(const void *p) @@ -856,42 +947,91 @@ static int sort_pack(const void *a_, const void *b_) return -1; } -static void rearrange_packed_git(void) +static void rearrange_packed_git(struct repository *r) { - packed_git = llist_mergesort(packed_git, get_next_packed_git, - set_next_packed_git, sort_pack); + r->objects->packed_git = llist_mergesort( + r->objects->packed_git, get_next_packed_git, + set_next_packed_git, sort_pack); } -static void prepare_packed_git_mru(void) +static void prepare_packed_git_mru(struct repository *r) { struct packed_git *p; - mru_clear(&packed_git_mru); - for (p = packed_git; p; p = p->next) - mru_append(&packed_git_mru, p); + INIT_LIST_HEAD(&r->objects->packed_git_mru); + + for (p = r->objects->packed_git; p; p = p->next) + list_add_tail(&p->mru, &r->objects->packed_git_mru); } -static int prepare_packed_git_run_once = 0; -void prepare_packed_git(void) +static void prepare_packed_git(struct repository *r) { struct alternate_object_database *alt; - if (prepare_packed_git_run_once) + if (r->objects->packed_git_initialized) return; - prepare_packed_git_one(get_object_directory(), 1); - prepare_alt_odb(); - for (alt = alt_odb_list; alt; alt = alt->next) - prepare_packed_git_one(alt->path, 0); - rearrange_packed_git(); - prepare_packed_git_mru(); - prepare_packed_git_run_once = 1; + prepare_multi_pack_index_one(r, r->objects->objectdir, 1); + prepare_packed_git_one(r, r->objects->objectdir, 1); + prepare_alt_odb(r); + for (alt = r->objects->alt_odb_list; alt; alt = alt->next) { + prepare_multi_pack_index_one(r, alt->path, 0); + prepare_packed_git_one(r, alt->path, 0); + } + rearrange_packed_git(r); + + r->objects->all_packs = NULL; + + prepare_packed_git_mru(r); + r->objects->packed_git_initialized = 1; +} + +void reprepare_packed_git(struct repository *r) +{ + r->objects->approximate_object_count_valid = 0; + r->objects->packed_git_initialized = 0; + prepare_packed_git(r); +} + +struct packed_git *get_packed_git(struct repository *r) +{ + prepare_packed_git(r); + return r->objects->packed_git; +} + +struct multi_pack_index *get_multi_pack_index(struct repository *r) +{ + prepare_packed_git(r); + return r->objects->multi_pack_index; +} + +struct packed_git *get_all_packs(struct repository *r) +{ + prepare_packed_git(r); + + if (!r->objects->all_packs) { + struct packed_git *p = r->objects->packed_git; + struct multi_pack_index *m; + + for (m = r->objects->multi_pack_index; m; m = m->next) { + uint32_t i; + for (i = 0; i < m->num_packs; i++) { + if (!prepare_midx_pack(m, i)) { + m->packs[i]->next = p; + p = m->packs[i]; + } + } + } + + r->objects->all_packs = p; + } + + return r->objects->all_packs; } -void reprepare_packed_git(void) +struct list_head *get_packed_git_mru(struct repository *r) { - approximate_object_count_valid = 0; - prepare_packed_git_run_once = 0; - prepare_packed_git(); + prepare_packed_git(r); + return &r->objects->packed_git_mru; } unsigned long unpack_object_header_buffer(const unsigned char *buf, @@ -987,13 +1127,14 @@ int unpack_object_header(struct packed_git *p, void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1) { unsigned i; + const unsigned hashsz = the_hash_algo->rawsz; for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + GIT_SHA1_RAWSZ * i)) + if (hasheq(sha1, p->bad_object_sha1 + hashsz * i)) return; p->bad_object_sha1 = xrealloc(p->bad_object_sha1, st_mult(GIT_MAX_RAWSZ, st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + GIT_SHA1_RAWSZ * p->num_bad_objects, sha1); + hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, sha1); p->num_bad_objects++; } @@ -1002,9 +1143,10 @@ const struct packed_git *has_packed_and_bad(const unsigned char *sha1) struct packed_git *p; unsigned i; - for (p = packed_git; p; p = p->next) + for (p = the_repository->objects->packed_git; p; p = p->next) for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) + if (hasheq(sha1, + p->bad_object_sha1 + the_hash_algo->rawsz * i)) return p; return NULL; } @@ -1042,7 +1184,7 @@ static off_t get_delta_base(struct packed_git *p, } else if (type == OBJ_REF_DELTA) { /* The base entry _must_ be in the same pack */ base_offset = find_pack_entry_one(base_info, p); - *curpos += 20; + *curpos += the_hash_algo->rawsz; } else die("I am totally screwed"); return base_offset; @@ -1080,17 +1222,19 @@ static const unsigned char *get_delta_base_sha1(struct packed_git *p, return NULL; } -static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset) +static int retry_bad_packed_offset(struct repository *r, + struct packed_git *p, + off_t obj_offset) { int type; struct revindex_entry *revidx; - const unsigned char *sha1; + struct object_id oid; revidx = find_pack_revindex(p, obj_offset); if (!revidx) return OBJ_BAD; - sha1 = nth_packed_object_sha1(p, revidx->nr); - mark_bad_packed_object(p, sha1); - type = sha1_object_info(sha1, NULL); + nth_packed_object_oid(&oid, p, revidx->nr); + mark_bad_packed_object(p, oid.hash); + type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) return OBJ_BAD; return type; @@ -1098,7 +1242,8 @@ static int retry_bad_packed_offset(struct packed_git *p, off_t obj_offset) #define POI_STACK_PREALLOC 64 -static enum object_type packed_to_object_type(struct packed_git *p, +static enum object_type packed_to_object_type(struct repository *r, + struct packed_git *p, off_t obj_offset, enum object_type type, struct pack_window **w_curs, @@ -1129,7 +1274,7 @@ static enum object_type packed_to_object_type(struct packed_git *p, if (type <= OBJ_NONE) { /* If getting the base itself fails, we first * retry the base, otherwise unwind */ - type = retry_bad_packed_offset(p, base_offset); + type = retry_bad_packed_offset(r, p, base_offset); if (type > OBJ_NONE) goto out; goto unwind; @@ -1157,7 +1302,7 @@ out: unwind: while (poi_stack_nr) { obj_offset = poi_stack[--poi_stack_nr]; - type = retry_bad_packed_offset(p, obj_offset); + type = retry_bad_packed_offset(r, p, obj_offset); if (type > OBJ_NONE) goto out; } @@ -1244,14 +1389,15 @@ static void detach_delta_base_cache_entry(struct delta_base_cache_entry *ent) free(ent); } -static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset, - unsigned long *base_size, enum object_type *type) +static void *cache_or_unpack_entry(struct repository *r, struct packed_git *p, + off_t base_offset, unsigned long *base_size, + enum object_type *type) { struct delta_base_cache_entry *ent; ent = get_delta_base_cache_entry(p, base_offset); if (!ent) - return unpack_entry(p, base_offset, type, base_size); + return unpack_entry(r, p, base_offset, type, base_size); if (type) *type = ent->type; @@ -1305,8 +1451,8 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, hashmap_add(&delta_base_cache, ent); } -int packed_object_info(struct packed_git *p, off_t obj_offset, - struct object_info *oi) +int packed_object_info(struct repository *r, struct packed_git *p, + off_t obj_offset, struct object_info *oi) { struct pack_window *w_curs = NULL; unsigned long size; @@ -1318,7 +1464,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset, * a "real" type later if the caller is interested. */ if (oi->contentp) { - *oi->contentp = cache_or_unpack_entry(p, obj_offset, oi->sizep, + *oi->contentp = cache_or_unpack_entry(r, p, obj_offset, oi->sizep, &type); if (!*oi->contentp) type = OBJ_BAD; @@ -1350,16 +1496,16 @@ int packed_object_info(struct packed_git *p, off_t obj_offset, *oi->disk_sizep = revidx[1].offset - obj_offset; } - if (oi->typep || oi->typename) { + if (oi->typep || oi->type_name) { enum object_type ptot; - ptot = packed_to_object_type(p, obj_offset, type, &w_curs, - curpos); + ptot = packed_to_object_type(r, p, obj_offset, + type, &w_curs, curpos); if (oi->typep) *oi->typep = ptot; - if (oi->typename) { - const char *tn = typename(ptot); + if (oi->type_name) { + const char *tn = type_name(ptot); if (tn) - strbuf_addstr(oi->typename, tn); + strbuf_addstr(oi->type_name, tn); } if (ptot < 0) { type = OBJ_BAD; @@ -1422,6 +1568,9 @@ static void *unpack_compressed_entry(struct packed_git *p, return NULL; } + /* versions of zlib can clobber unconsumed portion of outbuf */ + buffer[size] = '\0'; + return buffer; } @@ -1441,7 +1590,9 @@ struct unpack_entry_stack_ent { unsigned long size; }; -static void *read_object(const unsigned char *sha1, enum object_type *type, +static void *read_object(struct repository *r, + const struct object_id *oid, + enum object_type *type, unsigned long *size) { struct object_info oi = OBJECT_INFO_INIT; @@ -1450,12 +1601,12 @@ static void *read_object(const unsigned char *sha1, enum object_type *type, oi.sizep = size; oi.contentp = &content; - if (sha1_object_info_extended(sha1, &oi, 0) < 0) + if (oid_object_info_extended(r, oid, &oi, 0) < 0) return NULL; return content; } -void *unpack_entry(struct packed_git *p, off_t obj_offset, +void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, enum object_type *final_type, unsigned long *final_size) { struct pack_window *w_curs = NULL; @@ -1490,11 +1641,11 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); off_t len = revidx[1].offset - obj_offset; if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { - const unsigned char *sha1 = - nth_packed_object_sha1(p, revidx->nr); + struct object_id oid; + nth_packed_object_oid(&oid, p, revidx->nr); error("bad packed object CRC for %s", - sha1_to_hex(sha1)); - mark_bad_packed_object(p, sha1); + oid_to_hex(&oid)); + mark_bad_packed_object(p, oid.hash); data = NULL; goto out; } @@ -1537,7 +1688,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, case OBJ_OFS_DELTA: case OBJ_REF_DELTA: if (data) - die("BUG: unpack_entry: left loop at a valid delta"); + BUG("unpack_entry: left loop at a valid delta"); break; case OBJ_COMMIT: case OBJ_TREE: @@ -1577,16 +1728,16 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, * of a corrupted pack, and is better than failing outright. */ struct revindex_entry *revidx; - const unsigned char *base_sha1; + struct object_id base_oid; revidx = find_pack_revindex(p, obj_offset); if (revidx) { - base_sha1 = nth_packed_object_sha1(p, revidx->nr); + nth_packed_object_oid(&base_oid, p, revidx->nr); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", - sha1_to_hex(base_sha1), (uintmax_t)obj_offset, + oid_to_hex(&base_oid), (uintmax_t)obj_offset, p->pack_name); - mark_bad_packed_object(p, base_sha1); - base = read_object(base_sha1, &type, &base_size); + mark_bad_packed_object(p, base_oid.hash); + base = read_object(r, &base_oid, &type, &base_size); external_base = base; } } @@ -1643,10 +1794,35 @@ out: return data; } +int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32_t *result) +{ + const unsigned char *index_fanout = p->index_data; + const unsigned char *index_lookup; + const unsigned int hashsz = the_hash_algo->rawsz; + int index_lookup_width; + + if (!index_fanout) + BUG("bsearch_pack called without a valid pack-index"); + + index_lookup = index_fanout + 4 * 256; + if (p->index_version == 1) { + index_lookup_width = hashsz + 4; + index_lookup += 4; + } else { + index_lookup_width = hashsz; + index_fanout += 8; + index_lookup += 8; + } + + return bsearch_hash(oid->hash, (const uint32_t*)index_fanout, + index_lookup, index_lookup_width, result); +} + const unsigned char *nth_packed_object_sha1(struct packed_git *p, uint32_t n) { const unsigned char *index = p->index_data; + const unsigned int hashsz = the_hash_algo->rawsz; if (!index) { if (open_pack_index(p)) return NULL; @@ -1656,10 +1832,10 @@ const unsigned char *nth_packed_object_sha1(struct packed_git *p, return NULL; index += 4 * 256; if (p->index_version == 1) { - return index + 24 * n + 4; + return index + (hashsz + 4) * n + 4; } else { index += 8; - return index + 20 * n; + return index + hashsz * n; } } @@ -1691,71 +1867,37 @@ void check_pack_index_ptr(const struct packed_git *p, const void *vptr) off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) { const unsigned char *index = p->index_data; + const unsigned int hashsz = the_hash_algo->rawsz; index += 4 * 256; if (p->index_version == 1) { - return ntohl(*((uint32_t *)(index + 24 * n))); + return ntohl(*((uint32_t *)(index + (hashsz + 4) * n))); } else { uint32_t off; - index += 8 + p->num_objects * (20 + 4); + index += 8 + p->num_objects * (hashsz + 4); off = ntohl(*((uint32_t *)(index + 4 * n))); if (!(off & 0x80000000)) return off; index += p->num_objects * 4 + (off & 0x7fffffff) * 8; check_pack_index_ptr(p, index); - return (((uint64_t)ntohl(*((uint32_t *)(index + 0)))) << 32) | - ntohl(*((uint32_t *)(index + 4))); + return get_be64(index); } } off_t find_pack_entry_one(const unsigned char *sha1, struct packed_git *p) { - const uint32_t *level1_ofs = p->index_data; const unsigned char *index = p->index_data; - unsigned hi, lo, stride; - static int debug_lookup = -1; - - if (debug_lookup < 0) - debug_lookup = !!getenv("GIT_DEBUG_LOOKUP"); + struct object_id oid; + uint32_t result; if (!index) { if (open_pack_index(p)) return 0; - level1_ofs = p->index_data; - index = p->index_data; - } - if (p->index_version > 1) { - level1_ofs += 2; - index += 8; - } - index += 4 * 256; - hi = ntohl(level1_ofs[*sha1]); - lo = ((*sha1 == 0x0) ? 0 : ntohl(level1_ofs[*sha1 - 1])); - if (p->index_version > 1) { - stride = 20; - } else { - stride = 24; - index += 4; } - if (debug_lookup) - printf("%02x%02x%02x... lo %u hi %u nr %"PRIu32"\n", - sha1[0], sha1[1], sha1[2], lo, hi, p->num_objects); - - while (lo < hi) { - unsigned mi = lo + (hi - lo) / 2; - int cmp = hashcmp(index + mi * stride, sha1); - - if (debug_lookup) - printf("lo %u hi %u rg %u mi %u\n", - lo, hi, hi - lo, mi); - if (!cmp) - return nth_packed_object_offset(p, mi); - if (cmp > 0) - hi = mi; - else - lo = mi+1; - } + hashcpy(oid.hash, sha1); + if (bsearch_pack(&oid, p, &result)) + return nth_packed_object_offset(p, result); return 0; } @@ -1793,7 +1935,7 @@ struct packed_git *find_sha1_pack(const unsigned char *sha1, } -static int fill_pack_entry(const unsigned char *sha1, +static int fill_pack_entry(const struct object_id *oid, struct pack_entry *e, struct packed_git *p) { @@ -1802,11 +1944,12 @@ static int fill_pack_entry(const unsigned char *sha1, if (p->num_bad_objects) { unsigned i; for (i = 0; i < p->num_bad_objects; i++) - if (!hashcmp(sha1, p->bad_object_sha1 + 20 * i)) + if (hasheq(oid->hash, + p->bad_object_sha1 + the_hash_algo->rawsz * i)) return 0; } - offset = find_pack_entry_one(sha1, p); + offset = find_pack_entry_one(oid->hash, p); if (!offset) return 0; @@ -1821,35 +1964,37 @@ static int fill_pack_entry(const unsigned char *sha1, return 0; e->offset = offset; e->p = p; - hashcpy(e->sha1, sha1); return 1; } -/* - * Iff a pack file contains the object named by sha1, return true and - * store its location to e. - */ -int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) +int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e) { - struct mru_entry *p; + struct list_head *pos; + struct multi_pack_index *m; - prepare_packed_git(); - if (!packed_git) + prepare_packed_git(r); + if (!r->objects->packed_git && !r->objects->multi_pack_index) return 0; - for (p = packed_git_mru.head; p; p = p->next) { - if (fill_pack_entry(sha1, e, p->item)) { - mru_mark(&packed_git_mru, p); + for (m = r->objects->multi_pack_index; m; m = m->next) { + if (fill_midx_entry(oid, e, m)) + return 1; + } + + list_for_each(pos, &r->objects->packed_git_mru) { + struct packed_git *p = list_entry(pos, struct packed_git, mru); + if (fill_pack_entry(oid, e, p)) { + list_move(&p->mru, &r->objects->packed_git_mru); return 1; } } return 0; } -int has_sha1_pack(const unsigned char *sha1) +int has_object_pack(const struct object_id *oid) { struct pack_entry e; - return find_pack_entry(sha1, &e); + return find_pack_entry(the_repository, oid, &e); } int has_pack_index(const unsigned char *sha1) @@ -1860,42 +2005,115 @@ int has_pack_index(const unsigned char *sha1) return 1; } -static int for_each_object_in_pack(struct packed_git *p, each_packed_object_fn cb, void *data) +int for_each_object_in_pack(struct packed_git *p, + each_packed_object_fn cb, void *data, + enum for_each_object_flags flags) { uint32_t i; int r = 0; + if (flags & FOR_EACH_OBJECT_PACK_ORDER) + load_pack_revindex(p); + for (i = 0; i < p->num_objects; i++) { + uint32_t pos; struct object_id oid; - if (!nth_packed_object_oid(&oid, p, i)) + if (flags & FOR_EACH_OBJECT_PACK_ORDER) + pos = p->revindex[i].nr; + else + pos = i; + + if (!nth_packed_object_oid(&oid, p, pos)) return error("unable to get sha1 of object %u in %s", - i, p->pack_name); + pos, p->pack_name); - r = cb(&oid, p, i, data); + r = cb(&oid, p, pos, data); if (r) break; } return r; } -int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags) +int for_each_packed_object(each_packed_object_fn cb, void *data, + enum for_each_object_flags flags) { struct packed_git *p; int r = 0; int pack_errors = 0; - prepare_packed_git(); - for (p = packed_git; p; p = p->next) { + prepare_packed_git(the_repository); + for (p = get_all_packs(the_repository); p; p = p->next) { if ((flags & FOR_EACH_OBJECT_LOCAL_ONLY) && !p->pack_local) continue; + if ((flags & FOR_EACH_OBJECT_PROMISOR_ONLY) && + !p->pack_promisor) + continue; if (open_pack_index(p)) { pack_errors = 1; continue; } - r = for_each_object_in_pack(p, cb, data); + r = for_each_object_in_pack(p, cb, data, flags); if (r) break; } return r ? r : pack_errors; } + +static int add_promisor_object(const struct object_id *oid, + struct packed_git *pack, + uint32_t pos, + void *set_) +{ + struct oidset *set = set_; + struct object *obj = parse_object(the_repository, oid); + if (!obj) + return 1; + + oidset_insert(set, oid); + + /* + * If this is a tree, commit, or tag, the objects it refers + * to are also promisor objects. (Blobs refer to no objects->) + */ + if (obj->type == OBJ_TREE) { + struct tree *tree = (struct tree *)obj; + struct tree_desc desc; + struct name_entry entry; + if (init_tree_desc_gently(&desc, tree->buffer, tree->size)) + /* + * Error messages are given when packs are + * verified, so do not print any here. + */ + return 0; + while (tree_entry_gently(&desc, &entry)) + oidset_insert(set, entry.oid); + } else if (obj->type == OBJ_COMMIT) { + struct commit *commit = (struct commit *) obj; + struct commit_list *parents = commit->parents; + + oidset_insert(set, get_commit_tree_oid(commit)); + for (; parents; parents = parents->next) + oidset_insert(set, &parents->item->object.oid); + } else if (obj->type == OBJ_TAG) { + struct tag *tag = (struct tag *) obj; + oidset_insert(set, &tag->tagged->oid); + } + return 0; +} + +int is_promisor_object(const struct object_id *oid) +{ + static struct oidset promisor_objects; + static int promisor_objects_prepared; + + if (!promisor_objects_prepared) { + if (repository_format_partial_clone) { + for_each_packed_object(add_promisor_object, + &promisor_objects, + FOR_EACH_OBJECT_PROMISOR_ONLY); + } + promisor_objects_prepared = 1; + } + return oidset_contains(&promisor_objects, oid); +} |