diff options
Diffstat (limited to 'packfile.c')
-rw-r--r-- | packfile.c | 130 |
1 files changed, 71 insertions, 59 deletions
diff --git a/packfile.c b/packfile.c index f0dc63e92f..6ab5233613 100644 --- a/packfile.c +++ b/packfile.c @@ -178,6 +178,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map, */ (sizeof(off_t) <= 4)) return error("pack too large for current definition of off_t in %s", path); + p->crc_offset = 8 + 4 * 256 + nr * hashsz; } p->index_version = version; @@ -510,7 +511,6 @@ static int open_packed_git_1(struct packed_git *p) struct pack_header hdr; unsigned char hash[GIT_MAX_RAWSZ]; unsigned char *idx_hash; - long fd_flag; ssize_t read_result; const unsigned hashsz = the_hash_algo->rawsz; @@ -554,16 +554,6 @@ static int open_packed_git_1(struct packed_git *p) } else if (p->pack_size != st.st_size) return error("packfile %s size changed", p->pack_name); - /* We leave these file descriptors open with sliding mmap; - * there is no point keeping them open across exec(), though. - */ - fd_flag = fcntl(p->pack_fd, F_GETFD, 0); - if (fd_flag < 0) - return error("cannot determine file descriptor flags"); - fd_flag |= FD_CLOEXEC; - if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1) - return error("cannot set FD_CLOEXEC"); - /* Verify we recognize this pack file format. */ read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr)); if (read_result < 0) @@ -587,9 +577,8 @@ static int open_packed_git_1(struct packed_git *p) " while index indicates %"PRIu32" objects", p->pack_name, ntohl(hdr.hdr_entries), p->num_objects); - if (lseek(p->pack_fd, p->pack_size - hashsz, SEEK_SET) == -1) - return error("end of packfile %s is unavailable", p->pack_name); - read_result = read_in_full(p->pack_fd, hash, hashsz); + read_result = pread_in_full(p->pack_fd, hash, hashsz, + p->pack_size - hashsz); if (read_result < 0) return error_errno("error reading from %s", p->pack_name); if (read_result != hashsz) @@ -1016,12 +1005,14 @@ void reprepare_packed_git(struct repository *r) { struct object_directory *odb; + obj_read_lock(); for (odb = r->objects->odb; odb; odb = odb->next) odb_clear_loose_cache(odb); r->objects->approximate_object_count_valid = 0; r->objects->packed_git_initialized = 0; prepare_packed_git(r); + obj_read_unlock(); } struct packed_git *get_packed_git(struct repository *r) @@ -1098,7 +1089,23 @@ unsigned long get_size_from_delta(struct packed_git *p, do { in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; + /* + * Note: the window section returned by use_pack() must be + * available throughout git_inflate()'s unlocked execution. To + * ensure no other thread will modify the window in the + * meantime, we rely on the packed_window.inuse_cnt. This + * counter is incremented before window reading and checked + * before window disposal. + * + * Other worrying sections could be the call to close_pack_fd(), + * which can close packs even with in-use windows, and to + * reprepare_packed_git(). Regarding the former, mmap doc says: + * "closing the file descriptor does not unmap the region". And + * for the latter, it won't re-open already available packs. + */ + obj_read_unlock(); st = git_inflate(&stream, Z_FINISH); + obj_read_lock(); curpos += stream.next_in - in; } while ((st == Z_OK || st == Z_BUF_ERROR) && stream.total_out < sizeof(delta_head)); @@ -1174,11 +1181,11 @@ const struct packed_git *has_packed_and_bad(struct repository *r, return NULL; } -static off_t get_delta_base(struct packed_git *p, - struct pack_window **w_curs, - off_t *curpos, - enum object_type type, - off_t delta_obj_offset) +off_t get_delta_base(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + enum object_type type, + off_t delta_obj_offset) { unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL); off_t base_offset; @@ -1219,30 +1226,32 @@ static off_t get_delta_base(struct packed_git *p, * the final object lookup), but more expensive for OFS deltas (we * have to load the revidx to convert the offset back into a sha1). */ -static const unsigned char *get_delta_base_sha1(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos, - enum object_type type, - off_t delta_obj_offset) +static int get_delta_base_oid(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos, + struct object_id *oid, + enum object_type type, + off_t delta_obj_offset) { if (type == OBJ_REF_DELTA) { unsigned char *base = use_pack(p, w_curs, curpos, NULL); - return base; + oidread(oid, base); + return 0; } else if (type == OBJ_OFS_DELTA) { struct revindex_entry *revidx; off_t base_offset = get_delta_base(p, w_curs, &curpos, type, delta_obj_offset); if (!base_offset) - return NULL; + return -1; revidx = find_pack_revindex(p, base_offset); if (!revidx) - return NULL; + return -1; - return nth_packed_object_sha1(p, revidx->nr); + return nth_packed_object_id(oid, p, revidx->nr); } else - return NULL; + return -1; } static int retry_bad_packed_offset(struct repository *r, @@ -1255,7 +1264,7 @@ static int retry_bad_packed_offset(struct repository *r, revidx = find_pack_revindex(p, obj_offset); if (!revidx) return OBJ_BAD; - nth_packed_object_oid(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, revidx->nr); mark_bad_packed_object(p, oid.hash); type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) @@ -1457,6 +1466,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset, struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent)); struct list_head *lru, *tmp; + /* + * Check required to avoid redundant entries when more than one thread + * is unpacking the same object, in unpack_entry() (since its phases I + * and III might run concurrently across multiple threads). + */ + if (in_delta_base_cache(p, base_offset)) + return; + delta_base_cached += base_size; list_for_each_safe(lru, tmp, &delta_base_cache_lru) { @@ -1542,20 +1559,16 @@ int packed_object_info(struct repository *r, struct packed_git *p, } } - if (oi->delta_base_sha1) { + if (oi->delta_base_oid) { if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) { - const unsigned char *base; - - base = get_delta_base_sha1(p, &w_curs, curpos, - type, obj_offset); - if (!base) { + if (get_delta_base_oid(p, &w_curs, curpos, + oi->delta_base_oid, + type, obj_offset) < 0) { type = OBJ_BAD; goto out; } - - hashcpy(oi->delta_base_sha1, base); } else - hashclr(oi->delta_base_sha1); + oidclr(oi->delta_base_oid); } oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED : @@ -1586,7 +1599,15 @@ static void *unpack_compressed_entry(struct packed_git *p, do { in = use_pack(p, w_curs, curpos, &stream.avail_in); stream.next_in = in; + /* + * Note: we must ensure the window section returned by + * use_pack() will be available throughout git_inflate()'s + * unlocked execution. Please refer to the comment at + * get_size_from_delta() to see how this is done. + */ + obj_read_unlock(); st = git_inflate(&stream, Z_FINISH); + obj_read_lock(); if (!stream.avail_out) break; /* the payload is larger than it should be */ curpos += stream.next_in - in; @@ -1671,7 +1692,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, off_t len = revidx[1].offset - obj_offset; if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { struct object_id oid; - nth_packed_object_oid(&oid, p, revidx->nr); + nth_packed_object_id(&oid, p, revidx->nr); error("bad packed object CRC for %s", oid_to_hex(&oid)); mark_bad_packed_object(p, oid.hash); @@ -1760,7 +1781,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, struct object_id base_oid; revidx = find_pack_revindex(p, obj_offset); if (revidx) { - nth_packed_object_oid(&base_oid, p, revidx->nr); + nth_packed_object_id(&base_oid, p, revidx->nr); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", oid_to_hex(&base_oid), (uintmax_t)obj_offset, @@ -1847,36 +1868,27 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32 index_lookup, index_lookup_width, result); } -const unsigned char *nth_packed_object_sha1(struct packed_git *p, - uint32_t n) +int nth_packed_object_id(struct object_id *oid, + struct packed_git *p, + uint32_t n) { const unsigned char *index = p->index_data; const unsigned int hashsz = the_hash_algo->rawsz; if (!index) { if (open_pack_index(p)) - return NULL; + return -1; index = p->index_data; } if (n >= p->num_objects) - return NULL; + return -1; index += 4 * 256; if (p->index_version == 1) { - return index + (hashsz + 4) * n + 4; + oidread(oid, index + (hashsz + 4) * n + 4); } else { index += 8; - return index + hashsz * n; + oidread(oid, index + hashsz * n); } -} - -const struct object_id *nth_packed_object_oid(struct object_id *oid, - struct packed_git *p, - uint32_t n) -{ - const unsigned char *hash = nth_packed_object_sha1(p, n); - if (!hash) - return NULL; - hashcpy(oid->hash, hash); - return oid; + return 0; } void check_pack_index_ptr(const struct packed_git *p, const void *vptr) @@ -2055,7 +2067,7 @@ int for_each_object_in_pack(struct packed_git *p, else pos = i; - if (!nth_packed_object_oid(&oid, p, pos)) + if (nth_packed_object_id(&oid, p, pos) < 0) return error("unable to get sha1 of object %u in %s", pos, p->pack_name); |