diff options
Diffstat (limited to 'packfile.c')
-rw-r--r-- | packfile.c | 263 |
1 files changed, 189 insertions, 74 deletions
diff --git a/packfile.c b/packfile.c index f4e752996d..755aa7aec5 100644 --- a/packfile.c +++ b/packfile.c @@ -7,7 +7,7 @@ #include "packfile.h" #include "delta.h" #include "streaming.h" -#include "sha1-lookup.h" +#include "hash-lookup.h" #include "commit.h" #include "object.h" #include "tag.h" @@ -148,7 +148,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map, * - hash of the packfile * - file checksum */ - if (idx_size != 4 * 256 + nr * (hashsz + 4) + hashsz + hashsz) + if (idx_size != st_add(4 * 256 + hashsz + hashsz, st_mult(nr, hashsz + 4))) return error("wrong index v1 file size in %s", path); } else if (version == 2) { /* @@ -164,10 +164,10 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map, * variable sized table containing 8-byte entries * for offsets larger than 2^31. */ - unsigned long min_size = 8 + 4*256 + nr*(hashsz + 4 + 4) + hashsz + hashsz; - unsigned long max_size = min_size; + size_t min_size = st_add(8 + 4*256 + hashsz + hashsz, st_mult(nr, hashsz + 4 + 4)); + size_t max_size = min_size; if (nr) - max_size += (nr - 1)*8; + max_size = st_add(max_size, st_mult(nr - 1, 8)); if (idx_size < min_size || idx_size > max_size) return error("wrong index v2 file size in %s", path); if (idx_size != min_size && @@ -178,6 +178,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map, */ (sizeof(off_t) <= 4)) return error("pack too large for current definition of off_t in %s", path); + p->crc_offset = 8 + 4 * 256 + nr * hashsz; } p->index_version = version; @@ -323,11 +324,21 @@ void close_pack_index(struct packed_git *p) } } +void close_pack_revindex(struct packed_git *p) { + if (!p->revindex_map) + return; + + munmap((void *)p->revindex_map, p->revindex_size); + p->revindex_map = NULL; + p->revindex_data = NULL; +} + void close_pack(struct packed_git *p) { close_pack_windows(p); close_pack_fd(p); close_pack_index(p); + close_pack_revindex(p); } void close_object_store(struct raw_object_store *o) @@ -350,7 +361,7 @@ void close_object_store(struct raw_object_store *o) void unlink_pack_path(const char *pack_name, int force_delete) { - static const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"}; + static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor"}; int i; struct strbuf buf = STRBUF_INIT; size_t plen; @@ -513,19 +524,8 @@ static int open_packed_git_1(struct packed_git *p) ssize_t read_result; const unsigned hashsz = the_hash_algo->rawsz; - if (!p->index_data) { - struct multi_pack_index *m; - const char *pack_name = pack_basename(p); - - for (m = the_repository->objects->multi_pack_index; - m; m = m->next) { - if (midx_contains_pack(m, pack_name)) - break; - } - - if (!m && open_pack_index(p)) - return error("packfile %s index unavailable", p->pack_name); - } + if (open_pack_index(p)) + return error("packfile %s index unavailable", p->pack_name); if (!pack_max_fds) { unsigned int max_fds = get_max_fd_limit(); @@ -566,10 +566,6 @@ static int open_packed_git_1(struct packed_git *p) " supported (try upgrading GIT to a newer version)", p->pack_name, ntohl(hdr.hdr_version)); - /* Skip index checking if in multi-pack-index */ - if (!p->index_data) - return 0; - /* Verify the pack matches its index. */ if (p->num_objects != ntohl(hdr.hdr_entries)) return error("packfile %s claims to have %"PRIu32" objects" @@ -642,7 +638,7 @@ unsigned char *use_pack(struct packed_git *p, if (p->pack_fd == -1 && open_packed_git(p)) die("packfile %s cannot be accessed", p->pack_name); - win = xcalloc(1, sizeof(*win)); + CALLOC_ARRAY(win, 1); win->offset = (offset / window_align) * window_align; len = p->pack_size - win->offset; if (len > packed_git_window_size) @@ -817,10 +813,7 @@ void for_each_file_in_pack_dir(const char *objdir, } strbuf_addch(&path, '/'); dirnamelen = path.len; - while ((de = readdir(dir)) != NULL) { - if (is_dot_or_dotdot(de->d_name)) - continue; - + while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) { strbuf_setlen(&path, dirnamelen); strbuf_addstr(&path, de->d_name); @@ -866,7 +859,11 @@ static void prepare_pack(const char *full_name, size_t full_name_len, if (!strcmp(file_name, "multi-pack-index")) return; + if (starts_with(file_name, "multi-pack-index") && + ends_with(file_name, ".rev")) + return; if (ends_with(file_name, ".idx") || + ends_with(file_name, ".rev") || ends_with(file_name, ".pack") || ends_with(file_name, ".bitmap") || ends_with(file_name, ".keep") || @@ -922,6 +919,7 @@ unsigned long repo_approximate_object_count(struct repository *r) count += p->num_objects; } r->objects->approximate_object_count = count; + r->objects->approximate_object_count_valid = 1; } return r->objects->approximate_object_count; } @@ -1026,6 +1024,17 @@ struct multi_pack_index *get_multi_pack_index(struct repository *r) return r->objects->multi_pack_index; } +struct multi_pack_index *get_local_multi_pack_index(struct repository *r) +{ + struct multi_pack_index *m = get_multi_pack_index(r); + + /* no need to iterate; we always put the local one first (if any) */ + if (m && m->local) + return m; + + return NULL; +} + struct packed_git *get_all_packs(struct repository *r) { struct multi_pack_index *m; @@ -1237,18 +1246,18 @@ static int get_delta_base_oid(struct packed_git *p, oidread(oid, base); return 0; } else if (type == OBJ_OFS_DELTA) { - struct revindex_entry *revidx; + uint32_t base_pos; off_t base_offset = get_delta_base(p, w_curs, &curpos, type, delta_obj_offset); if (!base_offset) return -1; - revidx = find_pack_revindex(p, base_offset); - if (!revidx) + if (offset_to_pack_pos(p, base_offset, &base_pos) < 0) return -1; - return nth_packed_object_id(oid, p, revidx->nr); + return nth_packed_object_id(oid, p, + pack_pos_to_index(p, base_pos)); } else return -1; } @@ -1258,12 +1267,11 @@ static int retry_bad_packed_offset(struct repository *r, off_t obj_offset) { int type; - struct revindex_entry *revidx; + uint32_t pos; struct object_id oid; - revidx = find_pack_revindex(p, obj_offset); - if (!revidx) + if (offset_to_pack_pos(p, obj_offset, &pos) < 0) return OBJ_BAD; - nth_packed_object_id(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos)); mark_bad_packed_object(p, oid.hash); type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) @@ -1462,7 +1470,7 @@ void clear_delta_base_cache(void) static void add_delta_base_cache(struct packed_git *p, off_t base_offset, void *base, unsigned long base_size, enum object_type type) { - struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent)); + struct delta_base_cache_entry *ent; struct list_head *lru, *tmp; /* @@ -1470,8 +1478,10 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, * is unpacking the same object, in unpack_entry() (since its phases I * and III might run concurrently across multiple threads). */ - if (in_delta_base_cache(p, base_offset)) + if (in_delta_base_cache(p, base_offset)) { + free(base); return; + } delta_base_cached += base_size; @@ -1483,6 +1493,7 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, release_delta_base_cache(f); } + ent = xmalloc(sizeof(*ent)); ent->key.p = p; ent->key.base_offset = base_offset; ent->type = type; @@ -1537,8 +1548,15 @@ int packed_object_info(struct repository *r, struct packed_git *p, } if (oi->disk_sizep) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - *oi->disk_sizep = revidx[1].offset - obj_offset; + uint32_t pos; + if (offset_to_pack_pos(p, obj_offset, &pos) < 0) { + error("could not find object at offset %"PRIuMAX" " + "in pack %s", (uintmax_t)obj_offset, p->pack_name); + type = OBJ_BAD; + goto out; + } + + *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset; } if (oi->typep || oi->type_name) { @@ -1687,11 +1705,21 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, } if (do_check_packed_object_crc && p->index_version > 1) { - struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); - off_t len = revidx[1].offset - obj_offset; - if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { + uint32_t pack_pos, index_pos; + off_t len; + + if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) { + error("could not find object at offset %"PRIuMAX" in pack %s", + (uintmax_t)obj_offset, p->pack_name); + data = NULL; + goto out; + } + + len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset; + index_pos = pack_pos_to_index(p, pack_pos); + if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) { struct object_id oid; - nth_packed_object_id(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, index_pos); error("bad packed object CRC for %s", oid_to_hex(&oid)); mark_bad_packed_object(p, oid.hash); @@ -1763,12 +1791,10 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, void *external_base = NULL; unsigned long delta_size, base_size = size; int i; + off_t base_obj_offset = obj_offset; data = NULL; - if (base) - add_delta_base_cache(p, obj_offset, base, base_size, type); - if (!base) { /* * We're probably in deep shit, but let's try to fetch @@ -1776,11 +1802,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, * This is costly but should happen only in the presence * of a corrupted pack, and is better than failing outright. */ - struct revindex_entry *revidx; + uint32_t pos; struct object_id base_oid; - revidx = find_pack_revindex(p, obj_offset); - if (revidx) { - nth_packed_object_id(&base_oid, p, revidx->nr); + if (!(offset_to_pack_pos(p, obj_offset, &pos))) { + nth_packed_object_id(&base_oid, p, + pack_pos_to_index(p, pos)); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", oid_to_hex(&base_oid), (uintmax_t)obj_offset, @@ -1806,24 +1832,33 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, "at offset %"PRIuMAX" from %s", (uintmax_t)curpos, p->pack_name); data = NULL; - free(external_base); - continue; - } + } else { + data = patch_delta(base, base_size, delta_data, + delta_size, &size); - data = patch_delta(base, base_size, - delta_data, delta_size, - &size); + /* + * We could not apply the delta; warn the user, but + * keep going. Our failure will be noticed either in + * the next iteration of the loop, or if this is the + * final delta, in the caller when we return NULL. + * Those code paths will take care of making a more + * explicit warning and retrying with another copy of + * the object. + */ + if (!data) + error("failed to apply delta"); + } /* - * We could not apply the delta; warn the user, but keep going. - * Our failure will be noticed either in the next iteration of - * the loop, or if this is the final delta, in the caller when - * we return NULL. Those code paths will take care of making - * a more explicit warning and retrying with another copy of - * the object. + * We delay adding `base` to the cache until the end of the loop + * because unpack_compressed_entry() momentarily releases the + * obj_read_mutex, giving another thread the chance to access + * the cache. Therefore, if `base` was already there, this other + * thread could free() it (e.g. to make space for another entry) + * before we are done using it. */ - if (!data) - error("failed to apply delta"); + if (!external_base) + add_delta_base_cache(p, base_obj_offset, base, base_size, type); free(delta_data); free(external_base); @@ -1910,14 +1945,14 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n) const unsigned int hashsz = the_hash_algo->rawsz; index += 4 * 256; if (p->index_version == 1) { - return ntohl(*((uint32_t *)(index + (hashsz + 4) * n))); + return ntohl(*((uint32_t *)(index + (hashsz + 4) * (size_t)n))); } else { uint32_t off; - index += 8 + p->num_objects * (hashsz + 4); + index += 8 + (size_t)p->num_objects * (hashsz + 4); off = ntohl(*((uint32_t *)(index + 4 * n))); if (!(off & 0x80000000)) return off; - index += p->num_objects * 4 + (off & 0x7fffffff) * 8; + index += (size_t)p->num_objects * 4 + (off & 0x7fffffff) * 8; check_pack_index_ptr(p, index); return get_be64(index); } @@ -2031,12 +2066,79 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa return 0; } +static void maybe_invalidate_kept_pack_cache(struct repository *r, + unsigned flags) +{ + if (!r->objects->kept_pack_cache.packs) + return; + if (r->objects->kept_pack_cache.flags == flags) + return; + FREE_AND_NULL(r->objects->kept_pack_cache.packs); + r->objects->kept_pack_cache.flags = 0; +} + +static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags) +{ + maybe_invalidate_kept_pack_cache(r, flags); + + if (!r->objects->kept_pack_cache.packs) { + struct packed_git **packs = NULL; + size_t nr = 0, alloc = 0; + struct packed_git *p; + + /* + * We want "all" packs here, because we need to cover ones that + * are used by a midx, as well. We need to look in every one of + * them (instead of the midx itself) to cover duplicates. It's + * possible that an object is found in two packs that the midx + * covers, one kept and one not kept, but the midx returns only + * the non-kept version. + */ + for (p = get_all_packs(r); p; p = p->next) { + if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) || + (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) { + ALLOC_GROW(packs, nr + 1, alloc); + packs[nr++] = p; + } + } + ALLOC_GROW(packs, nr + 1, alloc); + packs[nr] = NULL; + + r->objects->kept_pack_cache.packs = packs; + r->objects->kept_pack_cache.flags = flags; + } + + return r->objects->kept_pack_cache.packs; +} + +int find_kept_pack_entry(struct repository *r, + const struct object_id *oid, + unsigned flags, + struct pack_entry *e) +{ + struct packed_git **cache; + + for (cache = kept_pack_cache(r, flags); *cache; cache++) { + struct packed_git *p = *cache; + if (fill_pack_entry(oid, e, p)) + return 1; + } + + return 0; +} + int has_object_pack(const struct object_id *oid) { struct pack_entry e; return find_pack_entry(the_repository, oid, &e); } +int has_object_kept_pack(const struct object_id *oid, unsigned flags) +{ + struct pack_entry e; + return find_kept_pack_entry(the_repository, oid, flags, &e); +} + int has_pack_index(const unsigned char *sha1) { struct stat st; @@ -2058,19 +2160,31 @@ int for_each_object_in_pack(struct packed_git *p, } for (i = 0; i < p->num_objects; i++) { - uint32_t pos; + uint32_t index_pos; struct object_id oid; + /* + * We are iterating "i" from 0 up to num_objects, but its + * meaning may be different, depending on the requested output + * order: + * + * - in object-name order, it is the same as the index order + * used by nth_packed_object_id(), so we can pass it + * directly + * + * - in pack-order, it is pack position, which we must + * convert to an index position in order to get the oid. + */ if (flags & FOR_EACH_OBJECT_PACK_ORDER) - pos = p->revindex[i].nr; + index_pos = pack_pos_to_index(p, i); else - pos = i; + index_pos = i; - if (nth_packed_object_id(&oid, p, pos) < 0) + if (nth_packed_object_id(&oid, p, index_pos) < 0) return error("unable to get sha1 of object %u in %s", - pos, p->pack_name); + index_pos, p->pack_name); - r = cb(&oid, p, pos, data); + r = cb(&oid, p, index_pos, data); if (r) break; } @@ -2130,6 +2244,7 @@ static int add_promisor_object(const struct object_id *oid, return 0; while (tree_entry_gently(&desc, &entry)) oidset_insert(set, &entry.oid); + free_tree_buffer(tree); } else if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; struct commit_list *parents = commit->parents; |