summaryrefslogtreecommitdiff
path: root/packfile.c
diff options
context:
space:
mode:
Diffstat (limited to 'packfile.c')
-rw-r--r--packfile.c257
1 files changed, 187 insertions, 70 deletions
diff --git a/packfile.c b/packfile.c
index f4e752996d..8668345d93 100644
--- a/packfile.c
+++ b/packfile.c
@@ -7,7 +7,7 @@
#include "packfile.h"
#include "delta.h"
#include "streaming.h"
-#include "sha1-lookup.h"
+#include "hash-lookup.h"
#include "commit.h"
#include "object.h"
#include "tag.h"
@@ -148,7 +148,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map,
* - hash of the packfile
* - file checksum
*/
- if (idx_size != 4 * 256 + nr * (hashsz + 4) + hashsz + hashsz)
+ if (idx_size != st_add(4 * 256 + hashsz + hashsz, st_mult(nr, hashsz + 4)))
return error("wrong index v1 file size in %s", path);
} else if (version == 2) {
/*
@@ -164,10 +164,10 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map,
* variable sized table containing 8-byte entries
* for offsets larger than 2^31.
*/
- unsigned long min_size = 8 + 4*256 + nr*(hashsz + 4 + 4) + hashsz + hashsz;
- unsigned long max_size = min_size;
+ size_t min_size = st_add(8 + 4*256 + hashsz + hashsz, st_mult(nr, hashsz + 4 + 4));
+ size_t max_size = min_size;
if (nr)
- max_size += (nr - 1)*8;
+ max_size = st_add(max_size, st_mult(nr - 1, 8));
if (idx_size < min_size || idx_size > max_size)
return error("wrong index v2 file size in %s", path);
if (idx_size != min_size &&
@@ -178,6 +178,7 @@ int load_idx(const char *path, const unsigned int hashsz, void *idx_map,
*/
(sizeof(off_t) <= 4))
return error("pack too large for current definition of off_t in %s", path);
+ p->crc_offset = 8 + 4 * 256 + nr * hashsz;
}
p->index_version = version;
@@ -323,11 +324,21 @@ void close_pack_index(struct packed_git *p)
}
}
+void close_pack_revindex(struct packed_git *p) {
+ if (!p->revindex_map)
+ return;
+
+ munmap((void *)p->revindex_map, p->revindex_size);
+ p->revindex_map = NULL;
+ p->revindex_data = NULL;
+}
+
void close_pack(struct packed_git *p)
{
close_pack_windows(p);
close_pack_fd(p);
close_pack_index(p);
+ close_pack_revindex(p);
}
void close_object_store(struct raw_object_store *o)
@@ -350,7 +361,7 @@ void close_object_store(struct raw_object_store *o)
void unlink_pack_path(const char *pack_name, int force_delete)
{
- static const char *exts[] = {".pack", ".idx", ".keep", ".bitmap", ".promisor"};
+ static const char *exts[] = {".pack", ".idx", ".rev", ".keep", ".bitmap", ".promisor"};
int i;
struct strbuf buf = STRBUF_INIT;
size_t plen;
@@ -513,19 +524,8 @@ static int open_packed_git_1(struct packed_git *p)
ssize_t read_result;
const unsigned hashsz = the_hash_algo->rawsz;
- if (!p->index_data) {
- struct multi_pack_index *m;
- const char *pack_name = pack_basename(p);
-
- for (m = the_repository->objects->multi_pack_index;
- m; m = m->next) {
- if (midx_contains_pack(m, pack_name))
- break;
- }
-
- if (!m && open_pack_index(p))
- return error("packfile %s index unavailable", p->pack_name);
- }
+ if (open_pack_index(p))
+ return error("packfile %s index unavailable", p->pack_name);
if (!pack_max_fds) {
unsigned int max_fds = get_max_fd_limit();
@@ -566,10 +566,6 @@ static int open_packed_git_1(struct packed_git *p)
" supported (try upgrading GIT to a newer version)",
p->pack_name, ntohl(hdr.hdr_version));
- /* Skip index checking if in multi-pack-index */
- if (!p->index_data)
- return 0;
-
/* Verify the pack matches its index. */
if (p->num_objects != ntohl(hdr.hdr_entries))
return error("packfile %s claims to have %"PRIu32" objects"
@@ -642,7 +638,7 @@ unsigned char *use_pack(struct packed_git *p,
if (p->pack_fd == -1 && open_packed_git(p))
die("packfile %s cannot be accessed", p->pack_name);
- win = xcalloc(1, sizeof(*win));
+ CALLOC_ARRAY(win, 1);
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
if (len > packed_git_window_size)
@@ -866,7 +862,11 @@ static void prepare_pack(const char *full_name, size_t full_name_len,
if (!strcmp(file_name, "multi-pack-index"))
return;
+ if (starts_with(file_name, "multi-pack-index") &&
+ ends_with(file_name, ".rev"))
+ return;
if (ends_with(file_name, ".idx") ||
+ ends_with(file_name, ".rev") ||
ends_with(file_name, ".pack") ||
ends_with(file_name, ".bitmap") ||
ends_with(file_name, ".keep") ||
@@ -922,6 +922,7 @@ unsigned long repo_approximate_object_count(struct repository *r)
count += p->num_objects;
}
r->objects->approximate_object_count = count;
+ r->objects->approximate_object_count_valid = 1;
}
return r->objects->approximate_object_count;
}
@@ -1026,6 +1027,17 @@ struct multi_pack_index *get_multi_pack_index(struct repository *r)
return r->objects->multi_pack_index;
}
+struct multi_pack_index *get_local_multi_pack_index(struct repository *r)
+{
+ struct multi_pack_index *m = get_multi_pack_index(r);
+
+ /* no need to iterate; we always put the local one first (if any) */
+ if (m && m->local)
+ return m;
+
+ return NULL;
+}
+
struct packed_git *get_all_packs(struct repository *r)
{
struct multi_pack_index *m;
@@ -1237,18 +1249,18 @@ static int get_delta_base_oid(struct packed_git *p,
oidread(oid, base);
return 0;
} else if (type == OBJ_OFS_DELTA) {
- struct revindex_entry *revidx;
+ uint32_t base_pos;
off_t base_offset = get_delta_base(p, w_curs, &curpos,
type, delta_obj_offset);
if (!base_offset)
return -1;
- revidx = find_pack_revindex(p, base_offset);
- if (!revidx)
+ if (offset_to_pack_pos(p, base_offset, &base_pos) < 0)
return -1;
- return nth_packed_object_id(oid, p, revidx->nr);
+ return nth_packed_object_id(oid, p,
+ pack_pos_to_index(p, base_pos));
} else
return -1;
}
@@ -1258,12 +1270,11 @@ static int retry_bad_packed_offset(struct repository *r,
off_t obj_offset)
{
int type;
- struct revindex_entry *revidx;
+ uint32_t pos;
struct object_id oid;
- revidx = find_pack_revindex(p, obj_offset);
- if (!revidx)
+ if (offset_to_pack_pos(p, obj_offset, &pos) < 0)
return OBJ_BAD;
- nth_packed_object_id(&oid, p, revidx->nr);
+ nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos));
mark_bad_packed_object(p, oid.hash);
type = oid_object_info(r, &oid, NULL);
if (type <= OBJ_NONE)
@@ -1462,7 +1473,7 @@ void clear_delta_base_cache(void)
static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
void *base, unsigned long base_size, enum object_type type)
{
- struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
+ struct delta_base_cache_entry *ent;
struct list_head *lru, *tmp;
/*
@@ -1470,8 +1481,10 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
* is unpacking the same object, in unpack_entry() (since its phases I
* and III might run concurrently across multiple threads).
*/
- if (in_delta_base_cache(p, base_offset))
+ if (in_delta_base_cache(p, base_offset)) {
+ free(base);
return;
+ }
delta_base_cached += base_size;
@@ -1483,6 +1496,7 @@ static void add_delta_base_cache(struct packed_git *p, off_t base_offset,
release_delta_base_cache(f);
}
+ ent = xmalloc(sizeof(*ent));
ent->key.p = p;
ent->key.base_offset = base_offset;
ent->type = type;
@@ -1537,8 +1551,15 @@ int packed_object_info(struct repository *r, struct packed_git *p,
}
if (oi->disk_sizep) {
- struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
- *oi->disk_sizep = revidx[1].offset - obj_offset;
+ uint32_t pos;
+ if (offset_to_pack_pos(p, obj_offset, &pos) < 0) {
+ error("could not find object at offset %"PRIuMAX" "
+ "in pack %s", (uintmax_t)obj_offset, p->pack_name);
+ type = OBJ_BAD;
+ goto out;
+ }
+
+ *oi->disk_sizep = pack_pos_to_offset(p, pos + 1) - obj_offset;
}
if (oi->typep || oi->type_name) {
@@ -1687,11 +1708,21 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
}
if (do_check_packed_object_crc && p->index_version > 1) {
- struct revindex_entry *revidx = find_pack_revindex(p, obj_offset);
- off_t len = revidx[1].offset - obj_offset;
- if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) {
+ uint32_t pack_pos, index_pos;
+ off_t len;
+
+ if (offset_to_pack_pos(p, obj_offset, &pack_pos) < 0) {
+ error("could not find object at offset %"PRIuMAX" in pack %s",
+ (uintmax_t)obj_offset, p->pack_name);
+ data = NULL;
+ goto out;
+ }
+
+ len = pack_pos_to_offset(p, pack_pos + 1) - obj_offset;
+ index_pos = pack_pos_to_index(p, pack_pos);
+ if (check_pack_crc(p, &w_curs, obj_offset, len, index_pos)) {
struct object_id oid;
- nth_packed_object_id(&oid, p, revidx->nr);
+ nth_packed_object_id(&oid, p, index_pos);
error("bad packed object CRC for %s",
oid_to_hex(&oid));
mark_bad_packed_object(p, oid.hash);
@@ -1763,12 +1794,10 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
void *external_base = NULL;
unsigned long delta_size, base_size = size;
int i;
+ off_t base_obj_offset = obj_offset;
data = NULL;
- if (base)
- add_delta_base_cache(p, obj_offset, base, base_size, type);
-
if (!base) {
/*
* We're probably in deep shit, but let's try to fetch
@@ -1776,11 +1805,11 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
* This is costly but should happen only in the presence
* of a corrupted pack, and is better than failing outright.
*/
- struct revindex_entry *revidx;
+ uint32_t pos;
struct object_id base_oid;
- revidx = find_pack_revindex(p, obj_offset);
- if (revidx) {
- nth_packed_object_id(&base_oid, p, revidx->nr);
+ if (!(offset_to_pack_pos(p, obj_offset, &pos))) {
+ nth_packed_object_id(&base_oid, p,
+ pack_pos_to_index(p, pos));
error("failed to read delta base object %s"
" at offset %"PRIuMAX" from %s",
oid_to_hex(&base_oid), (uintmax_t)obj_offset,
@@ -1806,24 +1835,33 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset,
"at offset %"PRIuMAX" from %s",
(uintmax_t)curpos, p->pack_name);
data = NULL;
- free(external_base);
- continue;
- }
+ } else {
+ data = patch_delta(base, base_size, delta_data,
+ delta_size, &size);
- data = patch_delta(base, base_size,
- delta_data, delta_size,
- &size);
+ /*
+ * We could not apply the delta; warn the user, but
+ * keep going. Our failure will be noticed either in
+ * the next iteration of the loop, or if this is the
+ * final delta, in the caller when we return NULL.
+ * Those code paths will take care of making a more
+ * explicit warning and retrying with another copy of
+ * the object.
+ */
+ if (!data)
+ error("failed to apply delta");
+ }
/*
- * We could not apply the delta; warn the user, but keep going.
- * Our failure will be noticed either in the next iteration of
- * the loop, or if this is the final delta, in the caller when
- * we return NULL. Those code paths will take care of making
- * a more explicit warning and retrying with another copy of
- * the object.
+ * We delay adding `base` to the cache until the end of the loop
+ * because unpack_compressed_entry() momentarily releases the
+ * obj_read_mutex, giving another thread the chance to access
+ * the cache. Therefore, if `base` was already there, this other
+ * thread could free() it (e.g. to make space for another entry)
+ * before we are done using it.
*/
- if (!data)
- error("failed to apply delta");
+ if (!external_base)
+ add_delta_base_cache(p, base_obj_offset, base, base_size, type);
free(delta_data);
free(external_base);
@@ -1910,14 +1948,14 @@ off_t nth_packed_object_offset(const struct packed_git *p, uint32_t n)
const unsigned int hashsz = the_hash_algo->rawsz;
index += 4 * 256;
if (p->index_version == 1) {
- return ntohl(*((uint32_t *)(index + (hashsz + 4) * n)));
+ return ntohl(*((uint32_t *)(index + (hashsz + 4) * (size_t)n)));
} else {
uint32_t off;
- index += 8 + p->num_objects * (hashsz + 4);
+ index += 8 + (size_t)p->num_objects * (hashsz + 4);
off = ntohl(*((uint32_t *)(index + 4 * n)));
if (!(off & 0x80000000))
return off;
- index += p->num_objects * 4 + (off & 0x7fffffff) * 8;
+ index += (size_t)p->num_objects * 4 + (off & 0x7fffffff) * 8;
check_pack_index_ptr(p, index);
return get_be64(index);
}
@@ -2031,12 +2069,79 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
return 0;
}
+static void maybe_invalidate_kept_pack_cache(struct repository *r,
+ unsigned flags)
+{
+ if (!r->objects->kept_pack_cache.packs)
+ return;
+ if (r->objects->kept_pack_cache.flags == flags)
+ return;
+ FREE_AND_NULL(r->objects->kept_pack_cache.packs);
+ r->objects->kept_pack_cache.flags = 0;
+}
+
+static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
+{
+ maybe_invalidate_kept_pack_cache(r, flags);
+
+ if (!r->objects->kept_pack_cache.packs) {
+ struct packed_git **packs = NULL;
+ size_t nr = 0, alloc = 0;
+ struct packed_git *p;
+
+ /*
+ * We want "all" packs here, because we need to cover ones that
+ * are used by a midx, as well. We need to look in every one of
+ * them (instead of the midx itself) to cover duplicates. It's
+ * possible that an object is found in two packs that the midx
+ * covers, one kept and one not kept, but the midx returns only
+ * the non-kept version.
+ */
+ for (p = get_all_packs(r); p; p = p->next) {
+ if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||
+ (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {
+ ALLOC_GROW(packs, nr + 1, alloc);
+ packs[nr++] = p;
+ }
+ }
+ ALLOC_GROW(packs, nr + 1, alloc);
+ packs[nr] = NULL;
+
+ r->objects->kept_pack_cache.packs = packs;
+ r->objects->kept_pack_cache.flags = flags;
+ }
+
+ return r->objects->kept_pack_cache.packs;
+}
+
+int find_kept_pack_entry(struct repository *r,
+ const struct object_id *oid,
+ unsigned flags,
+ struct pack_entry *e)
+{
+ struct packed_git **cache;
+
+ for (cache = kept_pack_cache(r, flags); *cache; cache++) {
+ struct packed_git *p = *cache;
+ if (fill_pack_entry(oid, e, p))
+ return 1;
+ }
+
+ return 0;
+}
+
int has_object_pack(const struct object_id *oid)
{
struct pack_entry e;
return find_pack_entry(the_repository, oid, &e);
}
+int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+{
+ struct pack_entry e;
+ return find_kept_pack_entry(the_repository, oid, flags, &e);
+}
+
int has_pack_index(const unsigned char *sha1)
{
struct stat st;
@@ -2058,19 +2163,31 @@ int for_each_object_in_pack(struct packed_git *p,
}
for (i = 0; i < p->num_objects; i++) {
- uint32_t pos;
+ uint32_t index_pos;
struct object_id oid;
+ /*
+ * We are iterating "i" from 0 up to num_objects, but its
+ * meaning may be different, depending on the requested output
+ * order:
+ *
+ * - in object-name order, it is the same as the index order
+ * used by nth_packed_object_id(), so we can pass it
+ * directly
+ *
+ * - in pack-order, it is pack position, which we must
+ * convert to an index position in order to get the oid.
+ */
if (flags & FOR_EACH_OBJECT_PACK_ORDER)
- pos = p->revindex[i].nr;
+ index_pos = pack_pos_to_index(p, i);
else
- pos = i;
+ index_pos = i;
- if (nth_packed_object_id(&oid, p, pos) < 0)
+ if (nth_packed_object_id(&oid, p, index_pos) < 0)
return error("unable to get sha1 of object %u in %s",
- pos, p->pack_name);
+ index_pos, p->pack_name);
- r = cb(&oid, p, pos, data);
+ r = cb(&oid, p, index_pos, data);
if (r)
break;
}