summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2018-08-20 15:29:54 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2018-08-20 15:29:54 -0700
commitc00ba2233ef7dcfa478068c75bc4b25a7ac2a0a8 (patch)
tree0c9e688081200f8984a6ac8850483c63c0ca8649
parentGit 2.19-rc0 (diff)
parentmidx: clear midx on repack (diff)
downloadtgif-c00ba2233ef7dcfa478068c75bc4b25a7ac2a0a8.tar.xz
Sync 'ds/multi-pack-index' to v2.19.0-rc0
* ds/multi-pack-index: (23 commits) midx: clear midx on repack packfile: skip loading index if in multi-pack-index midx: prevent duplicate packfile loads midx: use midx in approximate_object_count midx: use existing midx when writing new one midx: use midx in abbreviation calculations midx: read objects from multi-pack-index config: create core.multiPackIndex setting midx: write object offsets midx: write object id fanout chunk midx: write object ids in a chunk midx: sort and deduplicate objects from packfiles midx: read pack names into array multi-pack-index: write pack names in chunk multi-pack-index: read packfile list packfile: generalize pack directory list t5319: expand test data multi-pack-index: load into memory midx: write header information to lockfile multi-pack-index: add 'write' verb ...
-rw-r--r--.gitignore3
-rw-r--r--Documentation/config.txt5
-rw-r--r--Documentation/git-multi-pack-index.txt56
-rw-r--r--Documentation/technical/multi-pack-index.txt109
-rw-r--r--Documentation/technical/pack-format.txt77
-rw-r--r--Makefile3
-rw-r--r--builtin.h1
-rw-r--r--builtin/multi-pack-index.c47
-rw-r--r--builtin/repack.c9
-rw-r--r--command-list.txt1
-rw-r--r--git.c1
-rw-r--r--midx.c918
-rw-r--r--midx.h44
-rw-r--r--object-store.h9
-rw-r--r--packfile.c169
-rw-r--r--packfile.h9
-rw-r--r--sha1-name.c70
-rw-r--r--t/helper/test-read-midx.c51
-rw-r--r--t/helper/test-tool.c1
-rw-r--r--t/helper/test-tool.h1
-rwxr-xr-xt/t5319-multi-pack-index.sh179
21 files changed, 1720 insertions, 43 deletions
diff --git a/.gitignore b/.gitignore
index ffceea7d59..9d1363a1eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -99,8 +99,9 @@
/git-mergetool--lib
/git-mktag
/git-mktree
-/git-name-rev
+/git-multi-pack-index
/git-mv
+/git-name-rev
/git-notes
/git-p4
/git-pack-redundant
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 1c42364988..8283443c97 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -929,6 +929,11 @@ core.useReplaceRefs::
option was given on the command line. See linkgit:git[1] and
linkgit:git-replace[1] for more information.
+core.multiPackIndex::
+ Use the multi-pack-index file to track multiple packfiles using a
+ single index. See link:technical/multi-pack-index.html[the
+ multi-pack-index design document].
+
core.sparseCheckout::
Enable "sparse checkout" feature. See section "Sparse checkout" in
linkgit:git-read-tree[1] for more information.
diff --git a/Documentation/git-multi-pack-index.txt b/Documentation/git-multi-pack-index.txt
new file mode 100644
index 0000000000..1f97e79912
--- /dev/null
+++ b/Documentation/git-multi-pack-index.txt
@@ -0,0 +1,56 @@
+git-multi-pack-index(1)
+=======================
+
+NAME
+----
+git-multi-pack-index - Write and verify multi-pack-indexes
+
+
+SYNOPSIS
+--------
+[verse]
+'git multi-pack-index' [--object-dir=<dir>] <verb>
+
+DESCRIPTION
+-----------
+Write or verify a multi-pack-index (MIDX) file.
+
+OPTIONS
+-------
+
+--object-dir=<dir>::
+ Use given directory for the location of Git objects. We check
+ `<dir>/packs/multi-pack-index` for the current MIDX file, and
+ `<dir>/packs` for the pack-files to index.
+
+write::
+ When given as the verb, write a new MIDX file to
+ `<dir>/packs/multi-pack-index`.
+
+
+EXAMPLES
+--------
+
+* Write a MIDX file for the packfiles in the current .git folder.
++
+-----------------------------------------------
+$ git multi-pack-index write
+-----------------------------------------------
+
+* Write a MIDX file for the packfiles in an alternate object store.
++
+-----------------------------------------------
+$ git multi-pack-index --object-dir <alt> write
+-----------------------------------------------
+
+
+SEE ALSO
+--------
+See link:technical/multi-pack-index.html[The Multi-Pack-Index Design
+Document] and link:technical/pack-format.html[The Multi-Pack-Index
+Format] for more information on the multi-pack-index feature.
+
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/technical/multi-pack-index.txt b/Documentation/technical/multi-pack-index.txt
new file mode 100644
index 0000000000..d7e57639f7
--- /dev/null
+++ b/Documentation/technical/multi-pack-index.txt
@@ -0,0 +1,109 @@
+Multi-Pack-Index (MIDX) Design Notes
+====================================
+
+The Git object directory contains a 'pack' directory containing
+packfiles (with suffix ".pack") and pack-indexes (with suffix
+".idx"). The pack-indexes provide a way to lookup objects and
+navigate to their offset within the pack, but these must come
+in pairs with the packfiles. This pairing depends on the file
+names, as the pack-index differs only in suffix with its pack-
+file. While the pack-indexes provide fast lookup per packfile,
+this performance degrades as the number of packfiles increases,
+because abbreviations need to inspect every packfile and we are
+more likely to have a miss on our most-recently-used packfile.
+For some large repositories, repacking into a single packfile
+is not feasible due to storage space or excessive repack times.
+
+The multi-pack-index (MIDX for short) stores a list of objects
+and their offsets into multiple packfiles. It contains:
+
+- A list of packfile names.
+- A sorted list of object IDs.
+- A list of metadata for the ith object ID including:
+ - A value j referring to the jth packfile.
+ - An offset within the jth packfile for the object.
+- If large offsets are required, we use another list of large
+ offsets similar to version 2 pack-indexes.
+
+Thus, we can provide O(log N) lookup time for any number
+of packfiles.
+
+Design Details
+--------------
+
+- The MIDX is stored in a file named 'multi-pack-index' in the
+ .git/objects/pack directory. This could be stored in the pack
+ directory of an alternate. It refers only to packfiles in that
+ same directory.
+
+- The pack.multiIndex config setting must be on to consume MIDX files.
+
+- The file format includes parameters for the object ID hash
+ function, so a future change of hash algorithm does not require
+ a change in format.
+
+- The MIDX keeps only one record per object ID. If an object appears
+ in multiple packfiles, then the MIDX selects the copy in the most-
+ recently modified packfile.
+
+- If there exist packfiles in the pack directory not registered in
+ the MIDX, then those packfiles are loaded into the `packed_git`
+ list and `packed_git_mru` cache.
+
+- The pack-indexes (.idx files) remain in the pack directory so we
+ can delete the MIDX file, set core.midx to false, or downgrade
+ without any loss of information.
+
+- The MIDX file format uses a chunk-based approach (similar to the
+ commit-graph file) that allows optional data to be added.
+
+Future Work
+-----------
+
+- Add a 'verify' subcommand to the 'git midx' builtin to verify the
+ contents of the multi-pack-index file match the offsets listed in
+ the corresponding pack-indexes.
+
+- The multi-pack-index allows many packfiles, especially in a context
+ where repacking is expensive (such as a very large repo), or
+ unexpected maintenance time is unacceptable (such as a high-demand
+ build machine). However, the multi-pack-index needs to be rewritten
+ in full every time. We can extend the format to be incremental, so
+ writes are fast. By storing a small "tip" multi-pack-index that
+ points to large "base" MIDX files, we can keep writes fast while
+ still reducing the number of binary searches required for object
+ lookups.
+
+- The reachability bitmap is currently paired directly with a single
+ packfile, using the pack-order as the object order to hopefully
+ compress the bitmaps well using run-length encoding. This could be
+ extended to pair a reachability bitmap with a multi-pack-index. If
+ the multi-pack-index is extended to store a "stable object order"
+ (a function Order(hash) = integer that is constant for a given hash,
+ even as the multi-pack-index is updated) then a reachability bitmap
+ could point to a multi-pack-index and be updated independently.
+
+- Packfiles can be marked as "special" using empty files that share
+ the initial name but replace ".pack" with ".keep" or ".promisor".
+ We can add an optional chunk of data to the multi-pack-index that
+ records flags of information about the packfiles. This allows new
+ states, such as 'repacked' or 'redeltified', that can help with
+ pack maintenance in a multi-pack environment. It may also be
+ helpful to organize packfiles by object type (commit, tree, blob,
+ etc.) and use this metadata to help that maintenance.
+
+- The partial clone feature records special "promisor" packs that
+ may point to objects that are not stored locally, but available
+ on request to a server. The multi-pack-index does not currently
+ track these promisor packs.
+
+Related Links
+-------------
+[0] https://bugs.chromium.org/p/git/issues/detail?id=6
+ Chromium work item for: Multi-Pack Index (MIDX)
+
+[1] https://public-inbox.org/git/20180107181459.222909-1-dstolee@microsoft.com/
+ An earlier RFC for the multi-pack-index feature
+
+[2] https://public-inbox.org/git/alpine.DEB.2.20.1803091557510.23109@alexmv-linux/
+ Git Merge 2018 Contributor's summit notes (includes discussion of MIDX)
diff --git a/Documentation/technical/pack-format.txt b/Documentation/technical/pack-format.txt
index 70a99fd142..cab5bdd2ff 100644
--- a/Documentation/technical/pack-format.txt
+++ b/Documentation/technical/pack-format.txt
@@ -252,3 +252,80 @@ Pack file entry: <+
corresponding packfile.
20-byte SHA-1-checksum of all of the above.
+
+== multi-pack-index (MIDX) files have the following format:
+
+The multi-pack-index files refer to multiple pack-files and loose objects.
+
+In order to allow extensions that add extra data to the MIDX, we organize
+the body into "chunks" and provide a lookup table at the beginning of the
+body. The header includes certain length values, such as the number of packs,
+the number of base MIDX files, hash lengths and types.
+
+All 4-byte numbers are in network order.
+
+HEADER:
+
+ 4-byte signature:
+ The signature is: {'M', 'I', 'D', 'X'}
+
+ 1-byte version number:
+ Git only writes or recognizes version 1.
+
+ 1-byte Object Id Version
+ Git only writes or recognizes version 1 (SHA1).
+
+ 1-byte number of "chunks"
+
+ 1-byte number of base multi-pack-index files:
+ This value is currently always zero.
+
+ 4-byte number of pack files
+
+CHUNK LOOKUP:
+
+ (C + 1) * 12 bytes providing the chunk offsets:
+ First 4 bytes describe chunk id. Value 0 is a terminating label.
+ Other 8 bytes provide offset in current file for chunk to start.
+ (Chunks are provided in file-order, so you can infer the length
+ using the next chunk position if necessary.)
+
+ The remaining data in the body is described one chunk at a time, and
+ these chunks may be given in any order. Chunks are required unless
+ otherwise specified.
+
+CHUNK DATA:
+
+ Packfile Names (ID: {'P', 'N', 'A', 'M'})
+ Stores the packfile names as concatenated, null-terminated strings.
+ Packfiles must be listed in lexicographic order for fast lookups by
+ name. This is the only chunk not guaranteed to be a multiple of four
+ bytes in length, so should be the last chunk for alignment reasons.
+
+ OID Fanout (ID: {'O', 'I', 'D', 'F'})
+ The ith entry, F[i], stores the number of OIDs with first
+ byte at most i. Thus F[255] stores the total
+ number of objects.
+
+ OID Lookup (ID: {'O', 'I', 'D', 'L'})
+ The OIDs for all objects in the MIDX are stored in lexicographic
+ order in this chunk.
+
+ Object Offsets (ID: {'O', 'O', 'F', 'F'})
+ Stores two 4-byte values for every object.
+ 1: The pack-int-id for the pack storing this object.
+ 2: The offset within the pack.
+ If all offsets are less than 2^31, then the large offset chunk
+ will not exist and offsets are stored as in IDX v1.
+ If there is at least one offset value larger than 2^32-1, then
+ the large offset chunk must exist. If the large offset chunk
+ exists and the 31st bit is on, then removing that bit reveals
+ the row in the large offsets containing the 8-byte offset of
+ this object.
+
+ [Optional] Object Large Offsets (ID: {'L', 'O', 'F', 'F'})
+ 8-byte offsets into large packfiles.
+
+TRAILER:
+
+ 20-byte SHA1-checksum of the above contents.
diff --git a/Makefile b/Makefile
index d03df31c2a..377379fcc0 100644
--- a/Makefile
+++ b/Makefile
@@ -723,6 +723,7 @@ TEST_BUILTINS_OBJS += test-online-cpus.o
TEST_BUILTINS_OBJS += test-path-utils.o
TEST_BUILTINS_OBJS += test-prio-queue.o
TEST_BUILTINS_OBJS += test-read-cache.o
+TEST_BUILTINS_OBJS += test-read-midx.o
TEST_BUILTINS_OBJS += test-ref-store.o
TEST_BUILTINS_OBJS += test-regex.o
TEST_BUILTINS_OBJS += test-repository.o
@@ -900,6 +901,7 @@ LIB_OBJS += merge.o
LIB_OBJS += merge-blobs.o
LIB_OBJS += merge-recursive.o
LIB_OBJS += mergesort.o
+LIB_OBJS += midx.o
LIB_OBJS += name-hash.o
LIB_OBJS += negotiator/default.o
LIB_OBJS += negotiator/skipping.o
@@ -1060,6 +1062,7 @@ BUILTIN_OBJS += builtin/merge-recursive.o
BUILTIN_OBJS += builtin/merge-tree.o
BUILTIN_OBJS += builtin/mktag.o
BUILTIN_OBJS += builtin/mktree.o
+BUILTIN_OBJS += builtin/multi-pack-index.o
BUILTIN_OBJS += builtin/mv.o
BUILTIN_OBJS += builtin/name-rev.o
BUILTIN_OBJS += builtin/notes.o
diff --git a/builtin.h b/builtin.h
index 99206df4bd..962f0489ab 100644
--- a/builtin.h
+++ b/builtin.h
@@ -191,6 +191,7 @@ extern int cmd_merge_recursive(int argc, const char **argv, const char *prefix);
extern int cmd_merge_tree(int argc, const char **argv, const char *prefix);
extern int cmd_mktag(int argc, const char **argv, const char *prefix);
extern int cmd_mktree(int argc, const char **argv, const char *prefix);
+extern int cmd_multi_pack_index(int argc, const char **argv, const char *prefix);
extern int cmd_mv(int argc, const char **argv, const char *prefix);
extern int cmd_name_rev(int argc, const char **argv, const char *prefix);
extern int cmd_notes(int argc, const char **argv, const char *prefix);
diff --git a/builtin/multi-pack-index.c b/builtin/multi-pack-index.c
new file mode 100644
index 0000000000..6a7aa00cf2
--- /dev/null
+++ b/builtin/multi-pack-index.c
@@ -0,0 +1,47 @@
+#include "builtin.h"
+#include "cache.h"
+#include "config.h"
+#include "parse-options.h"
+#include "midx.h"
+
+static char const * const builtin_multi_pack_index_usage[] = {
+ N_("git multi-pack-index [--object-dir=<dir>] write"),
+ NULL
+};
+
+static struct opts_multi_pack_index {
+ const char *object_dir;
+} opts;
+
+int cmd_multi_pack_index(int argc, const char **argv,
+ const char *prefix)
+{
+ static struct option builtin_multi_pack_index_options[] = {
+ OPT_FILENAME(0, "object-dir", &opts.object_dir,
+ N_("object directory containing set of packfile and pack-index pairs")),
+ OPT_END(),
+ };
+
+ git_config(git_default_config, NULL);
+
+ argc = parse_options(argc, argv, prefix,
+ builtin_multi_pack_index_options,
+ builtin_multi_pack_index_usage, 0);
+
+ if (!opts.object_dir)
+ opts.object_dir = get_object_directory();
+
+ if (argc == 0)
+ goto usage;
+
+ if (!strcmp(argv[0], "write")) {
+ if (argc > 1)
+ goto usage;
+
+ return write_midx_file(opts.object_dir);
+ }
+
+usage:
+ usage_with_options(builtin_multi_pack_index_usage,
+ builtin_multi_pack_index_options);
+}
diff --git a/builtin/repack.c b/builtin/repack.c
index d5886039cc..42be88e86c 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -8,6 +8,7 @@
#include "strbuf.h"
#include "string-list.h"
#include "argv-array.h"
+#include "midx.h"
#include "packfile.h"
#include "object-store.h"
@@ -280,6 +281,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
int keep_unreachable = 0;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
int no_update_server_info = 0;
+ int midx_cleared = 0;
struct pack_objects_args po_args = {NULL};
struct option builtin_repack_options[] = {
@@ -418,6 +420,13 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
for_each_string_list_item(item, &names) {
for (ext = 0; ext < ARRAY_SIZE(exts); ext++) {
char *fname, *fname_old;
+
+ if (!midx_cleared) {
+ /* if we move a packfile, it will invalidated the midx */
+ clear_midx_file(get_object_directory());
+ midx_cleared = 1;
+ }
+
fname = mkpathdup("%s/pack-%s%s", packdir,
item->string, exts[ext].name);
if (!file_exists(fname)) {
diff --git a/command-list.txt b/command-list.txt
index a9dda3b8af..c36ea3c182 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -123,6 +123,7 @@ git-merge-index plumbingmanipulators
git-merge-one-file purehelpers
git-mergetool ancillarymanipulators complete
git-merge-tree ancillaryinterrogators
+git-multi-pack-index plumbingmanipulators
git-mktag plumbingmanipulators
git-mktree plumbingmanipulators
git-mv mainporcelain worktree
diff --git a/git.c b/git.c
index c27c38738b..a6f4b44af5 100644
--- a/git.c
+++ b/git.c
@@ -508,6 +508,7 @@ static struct cmd_struct commands[] = {
{ "merge-tree", cmd_merge_tree, RUN_SETUP | NO_PARSEOPT },
{ "mktag", cmd_mktag, RUN_SETUP | NO_PARSEOPT },
{ "mktree", cmd_mktree, RUN_SETUP },
+ { "multi-pack-index", cmd_multi_pack_index, RUN_SETUP_GENTLY },
{ "mv", cmd_mv, RUN_SETUP | NEED_WORK_TREE },
{ "name-rev", cmd_name_rev, RUN_SETUP },
{ "notes", cmd_notes, RUN_SETUP },
diff --git a/midx.c b/midx.c
new file mode 100644
index 0000000000..19b7df338e
--- /dev/null
+++ b/midx.c
@@ -0,0 +1,918 @@
+#include "cache.h"
+#include "config.h"
+#include "csum-file.h"
+#include "dir.h"
+#include "lockfile.h"
+#include "packfile.h"
+#include "object-store.h"
+#include "sha1-lookup.h"
+#include "midx.h"
+
+#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
+#define MIDX_VERSION 1
+#define MIDX_BYTE_FILE_VERSION 4
+#define MIDX_BYTE_HASH_VERSION 5
+#define MIDX_BYTE_NUM_CHUNKS 6
+#define MIDX_BYTE_NUM_PACKS 8
+#define MIDX_HASH_VERSION 1
+#define MIDX_HEADER_SIZE 12
+#define MIDX_HASH_LEN 20
+#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
+
+#define MIDX_MAX_CHUNKS 5
+#define MIDX_CHUNK_ALIGNMENT 4
+#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
+#define MIDX_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
+#define MIDX_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
+#define MIDX_CHUNKID_OBJECTOFFSETS 0x4f4f4646 /* "OOFF" */
+#define MIDX_CHUNKID_LARGEOFFSETS 0x4c4f4646 /* "LOFF" */
+#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
+#define MIDX_CHUNK_FANOUT_SIZE (sizeof(uint32_t) * 256)
+#define MIDX_CHUNK_OFFSET_WIDTH (2 * sizeof(uint32_t))
+#define MIDX_CHUNK_LARGE_OFFSET_WIDTH (sizeof(uint64_t))
+#define MIDX_LARGE_OFFSET_NEEDED 0x80000000
+
+static char *get_midx_filename(const char *object_dir)
+{
+ return xstrfmt("%s/pack/multi-pack-index", object_dir);
+}
+
+struct multi_pack_index *load_multi_pack_index(const char *object_dir)
+{
+ struct multi_pack_index *m = NULL;
+ int fd;
+ struct stat st;
+ size_t midx_size;
+ void *midx_map = NULL;
+ uint32_t hash_version;
+ char *midx_name = get_midx_filename(object_dir);
+ uint32_t i;
+ const char *cur_pack_name;
+
+ fd = git_open(midx_name);
+
+ if (fd < 0)
+ goto cleanup_fail;
+ if (fstat(fd, &st)) {
+ error_errno(_("failed to read %s"), midx_name);
+ goto cleanup_fail;
+ }
+
+ midx_size = xsize_t(st.st_size);
+
+ if (midx_size < MIDX_MIN_SIZE) {
+ error(_("multi-pack-index file %s is too small"), midx_name);
+ goto cleanup_fail;
+ }
+
+ FREE_AND_NULL(midx_name);
+
+ midx_map = xmmap(NULL, midx_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+ FLEX_ALLOC_MEM(m, object_dir, object_dir, strlen(object_dir));
+ m->fd = fd;
+ m->data = midx_map;
+ m->data_len = midx_size;
+
+ m->signature = get_be32(m->data);
+ if (m->signature != MIDX_SIGNATURE) {
+ error(_("multi-pack-index signature 0x%08x does not match signature 0x%08x"),
+ m->signature, MIDX_SIGNATURE);
+ goto cleanup_fail;
+ }
+
+ m->version = m->data[MIDX_BYTE_FILE_VERSION];
+ if (m->version != MIDX_VERSION) {
+ error(_("multi-pack-index version %d not recognized"),
+ m->version);
+ goto cleanup_fail;
+ }
+
+ hash_version = m->data[MIDX_BYTE_HASH_VERSION];
+ if (hash_version != MIDX_HASH_VERSION) {
+ error(_("hash version %u does not match"), hash_version);
+ goto cleanup_fail;
+ }
+ m->hash_len = MIDX_HASH_LEN;
+
+ m->num_chunks = m->data[MIDX_BYTE_NUM_CHUNKS];
+
+ m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
+
+ for (i = 0; i < m->num_chunks; i++) {
+ uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE +
+ MIDX_CHUNKLOOKUP_WIDTH * i);
+ uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 +
+ MIDX_CHUNKLOOKUP_WIDTH * i);
+
+ switch (chunk_id) {
+ case MIDX_CHUNKID_PACKNAMES:
+ m->chunk_pack_names = m->data + chunk_offset;
+ break;
+
+ case MIDX_CHUNKID_OIDFANOUT:
+ m->chunk_oid_fanout = (uint32_t *)(m->data + chunk_offset);
+ break;
+
+ case MIDX_CHUNKID_OIDLOOKUP:
+ m->chunk_oid_lookup = m->data + chunk_offset;
+ break;
+
+ case MIDX_CHUNKID_OBJECTOFFSETS:
+ m->chunk_object_offsets = m->data + chunk_offset;
+ break;
+
+ case MIDX_CHUNKID_LARGEOFFSETS:
+ m->chunk_large_offsets = m->data + chunk_offset;
+ break;
+
+ case 0:
+ die(_("terminating multi-pack-index chunk id appears earlier than expected"));
+ break;
+
+ default:
+ /*
+ * Do nothing on unrecognized chunks, allowing future
+ * extensions to add optional chunks.
+ */
+ break;
+ }
+ }
+
+ if (!m->chunk_pack_names)
+ die(_("multi-pack-index missing required pack-name chunk"));
+ if (!m->chunk_oid_fanout)
+ die(_("multi-pack-index missing required OID fanout chunk"));
+ if (!m->chunk_oid_lookup)
+ die(_("multi-pack-index missing required OID lookup chunk"));
+ if (!m->chunk_object_offsets)
+ die(_("multi-pack-index missing required object offsets chunk"));
+
+ m->num_objects = ntohl(m->chunk_oid_fanout[255]);
+
+ m->pack_names = xcalloc(m->num_packs, sizeof(*m->pack_names));
+ m->packs = xcalloc(m->num_packs, sizeof(*m->packs));
+
+ cur_pack_name = (const char *)m->chunk_pack_names;
+ for (i = 0; i < m->num_packs; i++) {
+ m->pack_names[i] = cur_pack_name;
+
+ cur_pack_name += strlen(cur_pack_name) + 1;
+
+ if (i && strcmp(m->pack_names[i], m->pack_names[i - 1]) <= 0) {
+ error(_("multi-pack-index pack names out of order: '%s' before '%s'"),
+ m->pack_names[i - 1],
+ m->pack_names[i]);
+ goto cleanup_fail;
+ }
+ }
+
+ return m;
+
+cleanup_fail:
+ free(m);
+ free(midx_name);
+ if (midx_map)
+ munmap(midx_map, midx_size);
+ if (0 <= fd)
+ close(fd);
+ return NULL;
+}
+
+static void close_midx(struct multi_pack_index *m)
+{
+ uint32_t i;
+ munmap((unsigned char *)m->data, m->data_len);
+ close(m->fd);
+ m->fd = -1;
+
+ for (i = 0; i < m->num_packs; i++) {
+ if (m->packs[i]) {
+ close_pack(m->packs[i]);
+ free(m->packs);
+ }
+ }
+ FREE_AND_NULL(m->packs);
+ FREE_AND_NULL(m->pack_names);
+}
+
+static int prepare_midx_pack(struct multi_pack_index *m, uint32_t pack_int_id)
+{
+ struct strbuf pack_name = STRBUF_INIT;
+
+ if (pack_int_id >= m->num_packs)
+ BUG("bad pack-int-id");
+
+ if (m->packs[pack_int_id])
+ return 0;
+
+ strbuf_addf(&pack_name, "%s/pack/%s", m->object_dir,
+ m->pack_names[pack_int_id]);
+
+ m->packs[pack_int_id] = add_packed_git(pack_name.buf, pack_name.len, 1);
+ strbuf_release(&pack_name);
+ return !m->packs[pack_int_id];
+}
+
+int bsearch_midx(const struct object_id *oid, struct multi_pack_index *m, uint32_t *result)
+{
+ return bsearch_hash(oid->hash, m->chunk_oid_fanout, m->chunk_oid_lookup,
+ MIDX_HASH_LEN, result);
+}
+
+struct object_id *nth_midxed_object_oid(struct object_id *oid,
+ struct multi_pack_index *m,
+ uint32_t n)
+{
+ if (n >= m->num_objects)
+ return NULL;
+
+ hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n);
+ return oid;
+}
+
+static off_t nth_midxed_offset(struct multi_pack_index *m, uint32_t pos)
+{
+ const unsigned char *offset_data;
+ uint32_t offset32;
+
+ offset_data = m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH;
+ offset32 = get_be32(offset_data + sizeof(uint32_t));
+
+ if (m->chunk_large_offsets && offset32 & MIDX_LARGE_OFFSET_NEEDED) {
+ if (sizeof(offset32) < sizeof(uint64_t))
+ die(_("multi-pack-index stores a 64-bit offset, but off_t is too small"));
+
+ offset32 ^= MIDX_LARGE_OFFSET_NEEDED;
+ return get_be64(m->chunk_large_offsets + sizeof(uint64_t) * offset32);
+ }
+
+ return offset32;
+}
+
+static uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos)
+{
+ return get_be32(m->chunk_object_offsets + pos * MIDX_CHUNK_OFFSET_WIDTH);
+}
+
+static int nth_midxed_pack_entry(struct multi_pack_index *m, struct pack_entry *e, uint32_t pos)
+{
+ uint32_t pack_int_id;
+ struct packed_git *p;
+
+ if (pos >= m->num_objects)
+ return 0;
+
+ pack_int_id = nth_midxed_pack_int_id(m, pos);
+
+ if (prepare_midx_pack(m, pack_int_id))
+ die(_("error preparing packfile from multi-pack-index"));
+ p = m->packs[pack_int_id];
+
+ /*
+ * We are about to tell the caller where they can locate the
+ * requested object. We better make sure the packfile is
+ * still here and can be accessed before supplying that
+ * answer, as it may have been deleted since the MIDX was
+ * loaded!
+ */
+ if (!is_pack_valid(p))
+ return 0;
+
+ e->offset = nth_midxed_offset(m, pos);
+ e->p = p;
+
+ return 1;
+}
+
+int fill_midx_entry(const struct object_id *oid, struct pack_entry *e, struct multi_pack_index *m)
+{
+ uint32_t pos;
+
+ if (!bsearch_midx(oid, m, &pos))
+ return 0;
+
+ return nth_midxed_pack_entry(m, e, pos);
+}
+
+int midx_contains_pack(struct multi_pack_index *m, const char *idx_name)
+{
+ uint32_t first = 0, last = m->num_packs;
+
+ while (first < last) {
+ uint32_t mid = first + (last - first) / 2;
+ const char *current;
+ int cmp;
+
+ current = m->pack_names[mid];
+ cmp = strcmp(idx_name, current);
+ if (!cmp)
+ return 1;
+ if (cmp > 0) {
+ first = mid + 1;
+ continue;
+ }
+ last = mid;
+ }
+
+ return 0;
+}
+
+int prepare_multi_pack_index_one(struct repository *r, const char *object_dir)
+{
+ struct multi_pack_index *m = r->objects->multi_pack_index;
+ struct multi_pack_index *m_search;
+ int config_value;
+
+ if (repo_config_get_bool(r, "core.multipackindex", &config_value) ||
+ !config_value)
+ return 0;
+
+ for (m_search = m; m_search; m_search = m_search->next)
+ if (!strcmp(object_dir, m_search->object_dir))
+ return 1;
+
+ r->objects->multi_pack_index = load_multi_pack_index(object_dir);
+
+ if (r->objects->multi_pack_index) {
+ r->objects->multi_pack_index->next = m;
+ return 1;
+ }
+
+ return 0;
+}
+
+static size_t write_midx_header(struct hashfile *f,
+ unsigned char num_chunks,
+ uint32_t num_packs)
+{
+ unsigned char byte_values[4];
+
+ hashwrite_be32(f, MIDX_SIGNATURE);
+ byte_values[0] = MIDX_VERSION;
+ byte_values[1] = MIDX_HASH_VERSION;
+ byte_values[2] = num_chunks;
+ byte_values[3] = 0; /* unused */
+ hashwrite(f, byte_values, sizeof(byte_values));
+ hashwrite_be32(f, num_packs);
+
+ return MIDX_HEADER_SIZE;
+}
+
+struct pack_list {
+ struct packed_git **list;
+ char **names;
+ uint32_t nr;
+ uint32_t alloc_list;
+ uint32_t alloc_names;
+ size_t pack_name_concat_len;
+ struct multi_pack_index *m;
+};
+
+static void add_pack_to_midx(const char *full_path, size_t full_path_len,
+ const char *file_name, void *data)
+{
+ struct pack_list *packs = (struct pack_list *)data;
+
+ if (ends_with(file_name, ".idx")) {
+ if (packs->m && midx_contains_pack(packs->m, file_name))
+ return;
+
+ ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list);
+ ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names);
+
+ packs->list[packs->nr] = add_packed_git(full_path,
+ full_path_len,
+ 0);
+
+ if (!packs->list[packs->nr]) {
+ warning(_("failed to add packfile '%s'"),
+ full_path);
+ return;
+ }
+
+ if (open_pack_index(packs->list[packs->nr])) {
+ warning(_("failed to open pack-index '%s'"),
+ full_path);
+ close_pack(packs->list[packs->nr]);
+ FREE_AND_NULL(packs->list[packs->nr]);
+ return;
+ }
+
+ packs->names[packs->nr] = xstrdup(file_name);
+ packs->pack_name_concat_len += strlen(file_name) + 1;
+ packs->nr++;
+ }
+}
+
+struct pack_pair {
+ uint32_t pack_int_id;
+ char *pack_name;
+};
+
+static int pack_pair_compare(const void *_a, const void *_b)
+{
+ struct pack_pair *a = (struct pack_pair *)_a;
+ struct pack_pair *b = (struct pack_pair *)_b;
+ return strcmp(a->pack_name, b->pack_name);
+}
+
+static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm)
+{
+ uint32_t i;
+ struct pack_pair *pairs;
+
+ ALLOC_ARRAY(pairs, nr_packs);
+
+ for (i = 0; i < nr_packs; i++) {
+ pairs[i].pack_int_id = i;
+ pairs[i].pack_name = pack_names[i];
+ }
+
+ QSORT(pairs, nr_packs, pack_pair_compare);
+
+ for (i = 0; i < nr_packs; i++) {
+ pack_names[i] = pairs[i].pack_name;
+ perm[pairs[i].pack_int_id] = i;
+ }
+
+ free(pairs);
+}
+
+struct pack_midx_entry {
+ struct object_id oid;
+ uint32_t pack_int_id;
+ time_t pack_mtime;
+ uint64_t offset;
+};
+
+static int midx_oid_compare(const void *_a, const void *_b)
+{
+ const struct pack_midx_entry *a = (const struct pack_midx_entry *)_a;
+ const struct pack_midx_entry *b = (const struct pack_midx_entry *)_b;
+ int cmp = oidcmp(&a->oid, &b->oid);
+
+ if (cmp)