summaryrefslogtreecommitdiff
path: root/packfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'packfile.c')
-rw-r--r--packfile.c130
1 files changed, 71 insertions, 59 deletions
diff --git a/packfile.c b/packfile.c
index f0dc63e92f..6ab5233613 100644
--- a/packfile.c
+++ b/packfile.c
@@ -178,6 +178,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map,
*/
(sizeof(off_t) <= 4))
return error("pack too large for current definition of off_t in %s", path);
+ p->crc_offset = 8 + 4 * 256 + nr * hashsz;
}
p->index_version = version;
@@ -510,7 +511,6 @@ static int open_packed_git_1(struct packed_git *p)
struct pack_header hdr;
unsigned char hash[GIT_MAX_RAWSZ];
unsigned char *idx_hash;
- long fd_flag;
ssize_t read_result;
const unsigned hashsz = the_hash_algo->rawsz;
@@ -554,16 +554,6 @@ static int open_packed_git_1(struct packed_git *p)
} else if (p->pack_size != st.st_size)
return error("packfile %s size changed", p->pack_name);
- /* We leave these file descriptors open with sliding mmap;
- * there is no point keeping them open across exec(), though.
- */
- fd_flag = fcntl(p->pack_fd, F_GETFD, 0);
- if (fd_flag < 0)
- return error("cannot determine file descriptor flags");
- fd_flag |= FD_CLOEXEC;
- if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1)
- return error("cannot set FD_CLOEXEC");
-
/* Verify we recognize this pack file format. */
read_result = read_in_full(p->pack_fd, &hdr, sizeof(hdr));
if (read_result < 0)
@@ -587,9 +577,8 @@ static int open_packed_git_1(struct packed_git *p)
" while index indicates %"PRIu32" objects",
p->pack_name, ntohl(hdr.hdr_entries),
p->num_objects);
- if (lseek(p->pack_fd, p->pack_size - hashsz, SEEK_SET) == -1)
- return error("end of packfile %s is unavailable", p->pack_name);
- read_result = read_in_full(p->pack_fd, hash, hashsz);
+ read_result = pread_in_full(p->pack_fd, hash, hashsz,
+ p->pack_size - hashsz);
if (read_result < 0)
return error_errno("error reading from %s", p->pack_name);
if (read_result != hashsz)
@@ -1016,12 +1005,14 @@ void reprepare_packed_git(struct repository *r)
{
struct object_directory *odb;
+ obj_read_lock();
for (odb = r->objects->odb; odb; odb = odb->next)
odb_clear_loose_cache(odb);
r->objects->approximate_object_count_valid = 0;
r->objects->packed_git_initialized = 0;
prepare_packed_git(r);
+ obj_read_unlock();
}
struct packed_git *get_packed_git(struct repository *r)
@@ -1098,7 +1089,23 @@ unsigned long get_size_from_delta(struct packed_git *p,
do {
in = use_pack(p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
+ /*
+ * Note: the window section returned by use_pack() must be
+ * available throughout git_inflate()'s unlocked execution. To
+ * ensure no other thread will modify the window in the
+ * meantime, we rely on the packed_window.inuse_cnt. This
+ * counter is incremented before window reading and checked
+ * before window disposal.
+ *
+ * Other worrying sections could be the call to close_pack_fd(),
+ * which can close packs even with in-use windows, and to
+ * reprepare_packed_git(). Regarding the former, mmap doc says:
+ * "closing the file descriptor does not unmap the region". And
+ * for the latter, it won't re-open already available packs.
+ */
+ obj_read_unlock();
st = git_inflate(&stream, Z_FINISH);
+ obj_read_lock();
curpos += stream.next_in - in;
} while ((st == Z_OK || st == Z_BUF_ERROR) &&
stream.total_out < sizeof(delta_head));
@@ -1174,11 +1181,11 @@ const struct packed_git *has_packed_and_bad(struct repository *r,
return NULL;
}
-static off_t get_delta_base(struct packed_git *p,
- struct pack_window **w_curs,
- off_t *curpos,
- enum object_type type,
- off_t delta_obj_offset)
+off_t get_delta_base(struct packed_git *p,
+ struct pack_window **w_curs,
+ off_t *curpos,
+ enum object_type type,
+ off_t delta_obj_offset)
{
unsigned char *base_info = use_pack(p, w_curs, *curpos, NULL);
off_t base_offset;
@@ -1219,30 +1226,32 @@ static off_t get_delta_base(struct packed_git *p,
* the final object lookup), but more expensive for OFS deltas (we
* have to load the revidx to convert the offset back into a sha1).
*/
-static const unsigned char *get_delta_base_sha1(struct packed_git *p,
- struct pack_window **w_curs,
- off_t curpos,
- enum object_type type,
- off_t delta_obj_offset)
+static int get_delta_base_oid(struct packed_git *p,
+ struct pack_window **w_curs,
+ off_t curpos,
+ struct object_id *oid,
+ enum object_type type,
+ off_t delta_obj_offset)
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
- return base;
+ oidread(oid, base);
+ return 0;
} else if (type == OBJ_OFS_DELTA) {
struct revindex_entry *revidx;
off_t base_offset = get_delta_base(p, w_curs, &curpos,
type, delta_obj_offset);
if (!base_offset)
- return NULL;
+ return -1;
revidx = find_pack_revindex(p, base_offset);
if (!revidx)
- return NULL;
+ return -1;
- return nth_packed_object_sha1(p, revidx->nr);
+ return nth_packed_object_id(oid, p, revidx->nr);
} else
- return NULL;
+ return -1;
}
static int retry_bad_packed_offset(struct repository *r,
@@ -1255,7 +1264,7 @@ static int retry_bad_packed_offset(struct repository *r,
revidx = find_pack_revindex(p, obj_offset);
if (!revidx)
return OBJ_BAD;
- nth_packed_object_oid(&oid, p, revidx->nr);
+ nth_packed_object_id(&oid, p, revidx->nr);
mark_bad_packed_object(p, oid.hash);
type = oid_object_info(r, &oid, NULL);
if (type <= OBJ_NONE)
@@ -1457,6 +1466,14 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
struct list_head *lru, *tmp;
+ /*
+ * Check required to avoid redundant entries when more than one thread
+ * is unpacking the same object, in unpack_entry() (since its phases I
+ * and III might run concurrently across multiple threads).
+ */
+ if (in_delta_base_cache(p, base_offset))
+ return;
+
delta_base_cached += base_size;
list_for_each_safe(lru, tmp, &delta_base_cache_lru) {
@@ -1542,20 +1559,16 @@ int packed_object_info(struct repository *r, struct packed_git *p,
}
}
- if (oi->delta_base_sha1) {
+ if (oi->delta_base_oid) {
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
- const unsigned char *base;
-
- base = get_delta_base_sha1(p, &w_curs, curpos,
- type, obj_offset);
- if (!base) {
+ if (get_delta_base_oid(p, &w_curs, curpos,
+ oi->delta_base_oid,
+ type, obj_offset) < 0) {
type = OBJ_BAD;
goto out;
}
-
- hashcpy(oi->delta_base_sha1, base);
} else
- hashclr(oi->delta_base_sha1);
+ oidclr(oi->delta_base_oid);
}
oi->whence = in_delta_base_cache(p, obj_offset) ? OI_DBCACHED :
@@ -1586,7 +1599,15 @@ static void *unpack_compressed_entry(struct packed_git *p,
do {
in = use_pack(p, w_curs, curpos, &stream.avail_in);
stream.next_in = in;
+ /*
+ * Note: we must ensure the window section returned by
+ * use_pack() will be available throughout git_inflate()'s
+ * unlocked execution. Please refer to the comment at
+ * get_size_from_delta() to see how this is done.
+ */
+ obj_read_unlock();
st = git_inflate(&stream, Z_FINISH);
+ obj_read_lock();
if (!stream.avail_out)
break; /* the payload is larger than it should be */
curpos += stream.next_in - in;
@@ -1671,7 +1692,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
off_t len = revidx[1].offset - obj_offset;
if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
struct object_id oid;
- nth_packed_object_oid(&oid, p, revidx->nr);
+ nth_packed_object_id(&oid, p, revidx->nr);
error("bad packed object CRC for %s",
oid_to_hex(&oid));
mark_bad_packed_object(p, oid.hash);
@@ -1760,7 +1781,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
struct object_id base_oid;
revidx = find_pack_revindex(p, obj_offset);
if (revidx) {
- nth_packed_object_oid(&base_oid, p, revidx->nr);
+ nth_packed_object_id(&base_oid, p, revidx->nr);
error("failed to read delta base object %s"
" at offset %"PRIuMAX" from %s",
oid_to_hex(&base_oid), (uintmax_t)obj_offset,
@@ -1847,36 +1868,27 @@ int bsearch_pack(const struct object_id *oid, const struct packed_git *p, uint32
index_lookup, index_lookup_width, result);
}
-const unsigned char *nth_packed_object_sha1(struct packed_git *p,
- uint32_t n)
+int nth_packed_object_id(struct object_id *oid,
+ struct packed_git *p,
+ uint32_t n)
{
const unsigned char *index = p->index_data;
const unsigned int hashsz = the_hash_algo->rawsz;
if (!index) {
if (open_pack_index(p))
- return NULL;
+ return -1;
index = p->index_data;
}
if (n >= p->num_objects)
- return NULL;
+ return -1;
index += 4 * 256;
if (p->index_version == 1) {
- return index + (hashsz + 4) * n + 4;
+ oidread(oid, index + (hashsz + 4) * n + 4);
} else {
index += 8;
- return index + hashsz * n;
+ oidread(oid, index + hashsz * n);
}
-}
-
-const struct object_id *nth_packed_object_oid(struct object_id *oid,
- struct packed_git *p,
- uint32_t n)
-{
- const unsigned char *hash = nth_packed_object_sha1(p, n);
- if (!hash)
- return NULL;
- hashcpy(oid->hash, hash);
- return oid;
+ return 0;
}
void check_pack_index_ptr(const struct packed_git *p, const void *vptr)
@@ -2055,7 +2067,7 @@ int for_each_object_in_pack(struct packed_git *p,
else
pos = i;
- if (!nth_packed_object_oid(&oid, p, pos))
+ if (nth_packed_object_id(&oid, p, pos) < 0)
return error("unable to get sha1 of object %u in %s",
pos, p->pack_name);