summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2018-05-08 15:59:20 +0900
committerLibravatar Junio C Hamano <gitster@pobox.com>2018-05-08 15:59:20 +0900
commitb10edb2df55241b2e042b3d5473537904d09d193 (patch)
tree0e456f3d65f1268c364a8d6d2b463456f23e222a
parentMerge branch 'js/empty-config-section-fix' (diff)
parentcommit-graph: implement "--append" option (diff)
downloadtgif-b10edb2df55241b2e042b3d5473537904d09d193.tar.xz
Merge branch 'ds/commit-graph'
Precompute and store information necessary for ancestry traversal in a separate file to optimize graph walking. * ds/commit-graph: commit-graph: implement "--append" option commit-graph: build graph from starting commits commit-graph: read only from specific pack-indexes commit: integrate commit graph with commit parsing commit-graph: close under reachability commit-graph: add core.commitGraph setting commit-graph: implement git commit-graph read commit-graph: implement git-commit-graph write commit-graph: implement write_commit_graph() commit-graph: create git-commit-graph builtin graph: add commit graph design document commit-graph: add format document csum-file: refactor finalize_hashfile() method csum-file: rename hashclose() to finalize_hashfile()
-rw-r--r--.gitignore1
-rw-r--r--Documentation/config.txt4
-rw-r--r--Documentation/git-commit-graph.txt94
-rw-r--r--Documentation/technical/commit-graph-format.txt97
-rw-r--r--Documentation/technical/commit-graph.txt163
-rw-r--r--Makefile2
-rw-r--r--alloc.c1
-rw-r--r--builtin.h1
-rw-r--r--builtin/commit-graph.c171
-rw-r--r--builtin/index-pack.c2
-rw-r--r--builtin/pack-objects.c6
-rw-r--r--bulk-checkin.c4
-rw-r--r--cache.h1
-rw-r--r--command-list.txt1
-rw-r--r--commit-graph.c741
-rw-r--r--commit-graph.h46
-rw-r--r--commit.c3
-rw-r--r--commit.h3
-rw-r--r--config.c5
-rw-r--r--contrib/completion/git-completion.bash2
-rw-r--r--csum-file.c10
-rw-r--r--csum-file.h9
-rw-r--r--environment.c1
-rw-r--r--fast-import.c2
-rw-r--r--git.c1
-rw-r--r--pack-bitmap-write.c2
-rw-r--r--pack-write.c5
-rw-r--r--packfile.c4
-rw-r--r--packfile.h2
-rwxr-xr-xt/t5318-commit-graph.sh224
30 files changed, 1587 insertions, 21 deletions
diff --git a/.gitignore b/.gitignore
index 8d9a458555..b2a1ae4a1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -35,6 +35,7 @@
/git-clone
/git-column
/git-commit
+/git-commit-graph
/git-commit-tree
/git-config
/git-count-objects
diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2659153cb3..cb4c93f74a 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -898,6 +898,10 @@ core.notesRef::
This setting defaults to "refs/notes/commits", and it can be overridden by
the `GIT_NOTES_REF` environment variable. See linkgit:git-notes[1].
+core.commitGraph::
+ Enable git commit graph feature. Allows reading from the
+ commit-graph file.
+
core.sparseCheckout::
Enable "sparse checkout" feature. See section "Sparse checkout" in
linkgit:git-read-tree[1] for more information.
diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
new file mode 100644
index 0000000000..4c97b555cc
--- /dev/null
+++ b/Documentation/git-commit-graph.txt
@@ -0,0 +1,94 @@
+git-commit-graph(1)
+===================
+
+NAME
+----
+git-commit-graph - Write and verify Git commit graph files
+
+
+SYNOPSIS
+--------
+[verse]
+'git commit-graph read' [--object-dir <dir>]
+'git commit-graph write' <options> [--object-dir <dir>]
+
+
+DESCRIPTION
+-----------
+
+Manage the serialized commit graph file.
+
+
+OPTIONS
+-------
+--object-dir::
+ Use given directory for the location of packfiles and commit graph
+ file. This parameter exists to specify the location of an alternate
+ that only has the objects directory, not a full .git directory. The
+ commit graph file is expected to be at <dir>/info/commit-graph and
+ the packfiles are expected to be in <dir>/pack.
+
+
+COMMANDS
+--------
+'write'::
+
+Write a commit graph file based on the commits found in packfiles.
++
+With the `--stdin-packs` option, generate the new commit graph by
+walking objects only in the specified pack-indexes. (Cannot be combined
+with --stdin-commits.)
++
+With the `--stdin-commits` option, generate the new commit graph by
+walking commits starting at the commits specified in stdin as a list
+of OIDs in hex, one OID per line. (Cannot be combined with
+--stdin-packs.)
++
+With the `--append` option, include all commits that are present in the
+existing commit-graph file.
+
+'read'::
+
+Read a graph file given by the commit-graph file and output basic
+details about the graph file. Used for debugging purposes.
+
+
+EXAMPLES
+--------
+
+* Write a commit graph file for the packed commits in your local .git folder.
++
+------------------------------------------------
+$ git commit-graph write
+------------------------------------------------
+
+* Write a graph file, extending the current graph file using commits
+* in <pack-index>.
++
+------------------------------------------------
+$ echo <pack-index> | git commit-graph write --stdin-packs
+------------------------------------------------
+
+* Write a graph file containing all reachable commits.
++
+------------------------------------------------
+$ git show-ref -s | git commit-graph write --stdin-commits
+------------------------------------------------
+
+* Write a graph file containing all commits in the current
+* commit-graph file along with those reachable from HEAD.
++
+------------------------------------------------
+$ git rev-parse HEAD | git commit-graph write --stdin-commits --append
+------------------------------------------------
+
+* Read basic information from the commit-graph file.
++
+------------------------------------------------
+$ git commit-graph read
+------------------------------------------------
+
+
+GIT
+---
+Part of the linkgit:git[1] suite
diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt
new file mode 100644
index 0000000000..ad6af8105c
--- /dev/null
+++ b/Documentation/technical/commit-graph-format.txt
@@ -0,0 +1,97 @@
+Git commit graph format
+=======================
+
+The Git commit graph stores a list of commit OIDs and some associated
+metadata, including:
+
+- The generation number of the commit. Commits with no parents have
+ generation number 1; commits with parents have generation number
+ one more than the maximum generation number of its parents. We
+ reserve zero as special, and can be used to mark a generation
+ number invalid or as "not computed".
+
+- The root tree OID.
+
+- The commit date.
+
+- The parents of the commit, stored using positional references within
+ the graph file.
+
+These positional references are stored as unsigned 32-bit integers
+corresponding to the array position withing the list of commit OIDs. We
+use the most-significant bit for special purposes, so we can store at most
+(1 << 31) - 1 (around 2 billion) commits.
+
+== Commit graph files have the following format:
+
+In order to allow extensions that add extra data to the graph, we organize
+the body into "chunks" and provide a binary lookup table at the beginning
+of the body. The header includes certain values, such as number of chunks
+and hash type.
+
+All 4-byte numbers are in network order.
+
+HEADER:
+
+ 4-byte signature:
+ The signature is: {'C', 'G', 'P', 'H'}
+
+ 1-byte version number:
+ Currently, the only valid version is 1.
+
+ 1-byte Hash Version (1 = SHA-1)
+ We infer the hash length (H) from this value.
+
+ 1-byte number (C) of "chunks"
+
+ 1-byte (reserved for later use)
+ Current clients should ignore this value.
+
+CHUNK LOOKUP:
+
+ (C + 1) * 12 bytes listing the table of contents for the chunks:
+ First 4 bytes describe the chunk id. Value 0 is a terminating label.
+ Other 8 bytes provide the byte-offset in current file for chunk to
+ start. (Chunks are ordered contiguously in the file, so you can infer
+ the length using the next chunk position if necessary.) Each chunk
+ ID appears at most once.
+
+ The remaining data in the body is described one chunk at a time, and
+ these chunks may be given in any order. Chunks are required unless
+ otherwise specified.
+
+CHUNK DATA:
+
+ OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes)
+ The ith entry, F[i], stores the number of OIDs with first
+ byte at most i. Thus F[255] stores the total
+ number of commits (N).
+
+ OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes)
+ The OIDs for all commits in the graph, sorted in ascending order.
+
+ Commit Data (ID: {'C', 'G', 'E', 'T' }) (N * (H + 16) bytes)
+ * The first H bytes are for the OID of the root tree.
+ * The next 8 bytes are for the positions of the first two parents
+ of the ith commit. Stores value 0xffffffff if no parent in that
+ position. If there are more than two parents, the second value
+ has its most-significant bit on and the other bits store an array
+ position into the Large Edge List chunk.
+ * The next 8 bytes store the generation number of the commit and
+ the commit time in seconds since EPOCH. The generation number
+ uses the higher 30 bits of the first 4 bytes, while the commit
+ time uses the 32 bits of the second 4 bytes, along with the lowest
+ 2 bits of the lowest byte, storing the 33rd and 34th bit of the
+ commit time.
+
+ Large Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional]
+ This list of 4-byte values store the second through nth parents for
+ all octopus merges. The second parent value in the commit data stores
+ an array position within this list along with the most-significant bit
+ on. Starting at that array position, iterate through this list of commit
+ positions for the parents until reaching a value with the most-significant
+ bit on. The other bits correspond to the position of the last parent.
+
+TRAILER:
+
+ H-byte HASH-checksum of all of the above.
diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt
new file mode 100644
index 0000000000..0550c6d0dc
--- /dev/null
+++ b/Documentation/technical/commit-graph.txt
@@ -0,0 +1,163 @@
+Git Commit Graph Design Notes
+=============================
+
+Git walks the commit graph for many reasons, including:
+
+1. Listing and filtering commit history.
+2. Computing merge bases.
+
+These operations can become slow as the commit count grows. The merge
+base calculation shows up in many user-facing commands, such as 'merge-base'
+or 'status' and can take minutes to compute depending on history shape.
+
+There are two main costs here:
+
+1. Decompressing and parsing commits.
+2. Walking the entire graph to satisfy topological order constraints.
+
+The commit graph file is a supplemental data structure that accelerates
+commit graph walks. If a user downgrades or disables the 'core.commitGraph'
+config setting, then the existing ODB is sufficient. The file is stored
+as "commit-graph" either in the .git/objects/info directory or in the info
+directory of an alternate.
+
+The commit graph file stores the commit graph structure along with some
+extra metadata to speed up graph walks. By listing commit OIDs in lexi-
+cographic order, we can identify an integer position for each commit and
+refer to the parents of a commit using those integer positions. We use
+binary search to find initial commits and then use the integer positions
+for fast lookups during the walk.
+
+A consumer may load the following info for a commit from the graph:
+
+1. The commit OID.
+2. The list of parents, along with their integer position.
+3. The commit date.
+4. The root tree OID.
+5. The generation number (see definition below).
+
+Values 1-4 satisfy the requirements of parse_commit_gently().
+
+Define the "generation number" of a commit recursively as follows:
+
+ * A commit with no parents (a root commit) has generation number one.
+
+ * A commit with at least one parent has generation number one more than
+ the largest generation number among its parents.
+
+Equivalently, the generation number of a commit A is one more than the
+length of a longest path from A to a root commit. The recursive definition
+is easier to use for computation and observing the following property:
+
+ If A and B are commits with generation numbers N and M, respectively,
+ and N <= M, then A cannot reach B. That is, we know without searching
+ that B is not an ancestor of A because it is further from a root commit
+ than A.
+
+ Conversely, when checking if A is an ancestor of B, then we only need
+ to walk commits until all commits on the walk boundary have generation
+ number at most N. If we walk commits using a priority queue seeded by
+ generation numbers, then we always expand the boundary commit with highest
+ generation number and can easily detect the stopping condition.
+
+This property can be used to significantly reduce the time it takes to
+walk commits and determine topological relationships. Without generation
+numbers, the general heuristic is the following:
+
+ If A and B are commits with commit time X and Y, respectively, and
+ X < Y, then A _probably_ cannot reach B.
+
+This heuristic is currently used whenever the computation is allowed to
+violate topological relationships due to clock skew (such as "git log"
+with default order), but is not used when the topological order is
+required (such as merge base calculations, "git log --graph").
+
+In practice, we expect some commits to be created recently and not stored
+in the commit graph. We can treat these commits as having "infinite"
+generation number and walk until reaching commits with known generation
+number.
+
+Design Details
+--------------
+
+- The commit graph file is stored in a file named 'commit-graph' in the
+ .git/objects/info directory. This could be stored in the info directory
+ of an alternate.
+
+- The core.commitGraph config setting must be on to consume graph files.
+
+- The file format includes parameters for the object ID hash function,
+ so a future change of hash algorithm does not require a change in format.
+
+Future Work
+-----------
+
+- The commit graph feature currently does not honor commit grafts. This can
+ be remedied by duplicating or refactoring the current graft logic.
+
+- The 'commit-graph' subcommand does not have a "verify" mode that is
+ necessary for integration with fsck.
+
+- The file format includes room for precomputed generation numbers. These
+ are not currently computed, so all generation numbers will be marked as
+ 0 (or "uncomputed"). A later patch will include this calculation.
+
+- After computing and storing generation numbers, we must make graph
+ walks aware of generation numbers to gain the performance benefits they
+ enable. This will mostly be accomplished by swapping a commit-date-ordered
+ priority queue with one ordered by generation number. The following
+ operations are important candidates:
+
+ - paint_down_to_common()
+ - 'log --topo-order'
+
+- Currently, parse_commit_gently() requires filling in the root tree
+ object for a commit. This passes through lookup_tree() and consequently
+ lookup_object(). Also, it calls lookup_commit() when loading the parents.
+ These method calls check the ODB for object existence, even if the
+ consumer does not need the content. For example, we do not need the
+ tree contents when computing merge bases. Now that commit parsing is
+ removed from the computation time, these lookup operations are the
+ slowest operations keeping graph walks from being fast. Consider
+ loading these objects without verifying their existence in the ODB and
+ only loading them fully when consumers need them. Consider a method
+ such as "ensure_tree_loaded(commit)" that fully loads a tree before
+ using commit->tree.
+
+- The current design uses the 'commit-graph' subcommand to generate the graph.
+ When this feature stabilizes enough to recommend to most users, we should
+ add automatic graph writes to common operations that create many commits.
+ For example, one could compute a graph on 'clone', 'fetch', or 'repack'
+ commands.
+
+- A server could provide a commit graph file as part of the network protocol
+ to avoid extra calculations by clients. This feature is only of benefit if
+ the user is willing to trust the file, because verifying the file is correct
+ is as hard as computing it from scratch.
+
+Related Links
+-------------
+[0] https://bugs.chromium.org/p/git/issues/detail?id=8
+ Chromium work item for: Serialized Commit Graph
+
+[1] https://public-inbox.org/git/20110713070517.GC18566@sigill.intra.peff.net/
+ An abandoned patch that introduced generation numbers.
+
+[2] https://public-inbox.org/git/20170908033403.q7e6dj7benasrjes@sigill.intra.peff.net/
+ Discussion about generation numbers on commits and how they interact
+ with fsck.
+
+[3] https://public-inbox.org/git/20170908034739.4op3w4f2ma5s65ku@sigill.intra.peff.net/
+ More discussion about generation numbers and not storing them inside
+ commit objects. A valuable quote:
+
+ "I think we should be moving more in the direction of keeping
+ repo-local caches for optimizations. Reachability bitmaps have been
+ a big performance win. I think we should be doing the same with our
+ properties of commits. Not just generation numbers, but making it
+ cheap to access the graph structure without zlib-inflating whole
+ commit objects (i.e., packv4 or something like the "metapacks" I
+ proposed a few years ago)."
+
+[4] https://public-inbox.org/git/20180108154822.54829-1-git@jeffhostetler.com/T/#u
+ A patch to remove the ahead-behind calculation from 'status'.
diff --git a/Makefile b/Makefile
index 46a215dc89..f89f2dba5b 100644
--- a/Makefile
+++ b/Makefile
@@ -816,6 +816,7 @@ LIB_OBJS += color.o
LIB_OBJS += column.o
LIB_OBJS += combine-diff.o
LIB_OBJS += commit.o
+LIB_OBJS += commit-graph.o
LIB_OBJS += compat/obstack.o
LIB_OBJS += compat/terminal.o
LIB_OBJS += config.o
@@ -995,6 +996,7 @@ BUILTIN_OBJS += builtin/clone.o
BUILTIN_OBJS += builtin/column.o
BUILTIN_OBJS += builtin/commit-tree.o
BUILTIN_OBJS += builtin/commit.o
+BUILTIN_OBJS += builtin/commit-graph.o
BUILTIN_OBJS += builtin/config.o
BUILTIN_OBJS += builtin/count-objects.o
BUILTIN_OBJS += builtin/credential.o
diff --git a/alloc.c b/alloc.c
index 12afadfacd..cf4f8b61e1 100644
--- a/alloc.c
+++ b/alloc.c
@@ -93,6 +93,7 @@ void *alloc_commit_node(void)
struct commit *c = alloc_node(&commit_state, sizeof(struct commit));
c->object.type = OBJ_COMMIT;
c->index = alloc_commit_index();
+ c->graph_pos = COMMIT_NOT_FROM_GRAPH;
return c;
}
diff --git a/builtin.h b/builtin.h
index 3f3fdfc281..4e0f64723e 100644
--- a/builtin.h
+++ b/builtin.h
@@ -149,6 +149,7 @@ extern int cmd_clone(int argc, const char **argv, const char *prefix);
extern int cmd_clean(int argc, const char **argv, const char *prefix);
extern int cmd_column(int argc, const char **argv, const char *prefix);
extern int cmd_commit(int argc, const char **argv, const char *prefix);
+extern int cmd_commit_graph(int argc, const char **argv, const char *prefix);
extern int cmd_commit_tree(int argc, const char **argv, const char *prefix);
extern int cmd_config(int argc, const char **argv, const char *prefix);
extern int cmd_count_objects(int argc, const char **argv, const char *prefix);
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
new file mode 100644
index 0000000000..37420ae0fd
--- /dev/null
+++ b/builtin/commit-graph.c
@@ -0,0 +1,171 @@
+#include "builtin.h"
+#include "config.h"
+#include "dir.h"
+#include "lockfile.h"
+#include "parse-options.h"
+#include "commit-graph.h"
+
+static char const * const builtin_commit_graph_usage[] = {
+ N_("git commit-graph [--object-dir <objdir>]"),
+ N_("git commit-graph read [--object-dir <objdir>]"),
+ N_("git commit-graph write [--object-dir <objdir>] [--append] [--stdin-packs|--stdin-commits]"),
+ NULL
+};
+
+static const char * const builtin_commit_graph_read_usage[] = {
+ N_("git commit-graph read [--object-dir <objdir>]"),
+ NULL
+};
+
+static const char * const builtin_commit_graph_write_usage[] = {
+ N_("git commit-graph write [--object-dir <objdir>] [--append] [--stdin-packs|--stdin-commits]"),
+ NULL
+};
+
+static struct opts_commit_graph {
+ const char *obj_dir;
+ int stdin_packs;
+ int stdin_commits;
+ int append;
+} opts;
+
+static int graph_read(int argc, const char **argv)
+{
+ struct commit_graph *graph = NULL;
+ char *graph_name;
+
+ static struct option builtin_commit_graph_read_options[] = {
+ OPT_STRING(0, "object-dir", &opts.obj_dir,
+ N_("dir"),
+ N_("The object directory to store the graph")),
+ OPT_END(),
+ };
+
+ argc = parse_options(argc, argv, NULL,
+ builtin_commit_graph_read_options,
+ builtin_commit_graph_read_usage, 0);
+
+ if (!opts.obj_dir)
+ opts.obj_dir = get_object_directory();
+
+ graph_name = get_commit_graph_filename(opts.obj_dir);
+ graph = load_commit_graph_one(graph_name);
+
+ if (!graph)
+ die("graph file %s does not exist", graph_name);
+ FREE_AND_NULL(graph_name);
+
+ printf("header: %08x %d %d %d %d\n",
+ ntohl(*(uint32_t*)graph->data),
+ *(unsigned char*)(graph->data + 4),
+ *(unsigned char*)(graph->data + 5),
+ *(unsigned char*)(graph->data + 6),
+ *(unsigned char*)(graph->data + 7));
+ printf("num_commits: %u\n", graph->num_commits);
+ printf("chunks:");
+
+ if (graph->chunk_oid_fanout)
+ printf(" oid_fanout");
+ if (graph->chunk_oid_lookup)
+ printf(" oid_lookup");
+ if (graph->chunk_commit_data)
+ printf(" commit_metadata");
+ if (graph->chunk_large_edges)
+ printf(" large_edges");
+ printf("\n");
+
+ return 0;
+}
+
+static int graph_write(int argc, const char **argv)
+{
+ const char **pack_indexes = NULL;
+ int packs_nr = 0;
+ const char **commit_hex = NULL;
+ int commits_nr = 0;
+ const char **lines = NULL;
+ int lines_nr = 0;
+ int lines_alloc = 0;
+
+ static struct option builtin_commit_graph_write_options[] = {
+ OPT_STRING(0, "object-dir", &opts.obj_dir,
+ N_("dir"),
+ N_("The object directory to store the graph")),
+ OPT_BOOL(0, "stdin-packs", &opts.stdin_packs,
+ N_("scan pack-indexes listed by stdin for commits")),
+ OPT_BOOL(0, "stdin-commits", &opts.stdin_commits,
+ N_("start walk at commits listed by stdin")),
+ OPT_BOOL(0, "append", &opts.append,
+ N_("include all commits already in the commit-graph file")),
+ OPT_END(),
+ };
+
+ argc = parse_options(argc, argv, NULL,
+ builtin_commit_graph_write_options,
+ builtin_commit_graph_write_usage, 0);
+
+ if (opts.stdin_packs && opts.stdin_commits)
+ die(_("cannot use both --stdin-commits and --stdin-packs"));
+ if (!opts.obj_dir)
+ opts.obj_dir = get_object_directory();
+
+ if (opts.stdin_packs || opts.stdin_commits) {
+ struct strbuf buf = STRBUF_INIT;
+ lines_nr = 0;
+ lines_alloc = 128;
+ ALLOC_ARRAY(lines, lines_alloc);
+
+ while (strbuf_getline(&buf, stdin) != EOF) {
+ ALLOC_GROW(lines, lines_nr + 1, lines_alloc);
+ lines[lines_nr++] = strbuf_detach(&buf, NULL);
+ }
+
+ if (opts.stdin_packs) {
+ pack_indexes = lines;
+ packs_nr = lines_nr;
+ }
+ if (opts.stdin_commits) {
+ commit_hex = lines;
+ commits_nr = lines_nr;
+ }
+ }
+
+ write_commit_graph(opts.obj_dir,
+ pack_indexes,
+ packs_nr,
+ commit_hex,
+ commits_nr,
+ opts.append);
+
+ return 0;
+}
+
+int cmd_commit_graph(int argc, const char **argv, const char *prefix)
+{
+ static struct option builtin_commit_graph_options[] = {
+ OPT_STRING(0, "object-dir", &opts.obj_dir,
+ N_("dir"),
+ N_("The object directory to store the graph")),
+ OPT_END(),
+ };
+
+ if (argc == 2 && !strcmp(argv[1], "-h"))
+ usage_with_options(builtin_commit_graph_usage,
+ builtin_commit_graph_options);
+
+ git_config(git_default_config, NULL);
+ argc = parse_options(argc, argv, prefix,
+ builtin_commit_graph_options,
+ builtin_commit_graph_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (argc > 0) {
+ if (!strcmp(argv[0], "read"))
+ return graph_read(argc, argv);
+ if (!strcmp(argv[0], "write"))
+ return graph_write(argc, argv);
+ }
+
+ usage_with_options(builtin_commit_graph_usage,
+ builtin_commit_graph_options);
+}
diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 78e66b9986..a2cd29d8f4 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1271,7 +1271,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
nr_objects - nr_objects_initial);
stop_progress_msg(&progress, msg.buf);
strbuf_release(&msg);
- hashclose(f, tail_hash, 0);
+ finalize_hashfile(f, tail_hash, 0);
hashcpy(read_hash, pack_hash);
fixup_pack_header_footer(output_fd, pack_hash,
curr_pack, nr_objects,
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4bdae5a1d8..24b1c6c5dd 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -837,11 +837,11 @@ static void write_pack_file(void)
* If so, rewrite it like in fast-import
*/
if (pack_to_stdout) {
- hashclose(f, oid.hash, CSUM_CLOSE);
+ finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
} else if (nr_written == nr_remaining) {
- hashclose(f, oid.hash, CSUM_FSYNC);
+ finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
- int fd = hashclose(f, oid.hash, 0);
+ int fd = finalize_hashfile(f, oid.hash, 0);
fixup_pack_header_footer(fd, oid.hash, pack_tmp_name,
nr_written, oid.hash, offset);
close(fd);
diff --git a/bulk-checkin.c b/bulk-checkin.c
index de1f4040c7..c0bc8de107 100644
--- a/bulk-checkin.c
+++ b/bulk-checkin.c
@@ -36,9 +36,9 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
unlink(state->pack_tmp_name);
goto clear_exit;
} else if (state->nr_written == 1) {
- hashclose(state->f, oid.hash, CSUM_FSYNC);
+ finalize_hashfile(state->f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
- int fd = hashclose(state->f, oid.hash, 0);
+ int fd = finalize_hashfile(state->f, oid.hash, 0);
fixup_pack_header_footer(fd, oid.hash, state->pack_tmp_name,
state->nr_written, oid.hash,
state->offset);
diff --git a/cache.h b/cache.h
index 6f14cfbb6f..601a49c79f 100644
--- a/cache.h
+++ b/cache.h
@@ -806,6 +806,7 @@ extern char *git_replace_ref_base;
extern int fsync_object_files;
extern int core_preload_index;
+extern int core_commit_graph;
extern int core_apply_sparse_checkout;
extern int precomposed_unicode;
extern int protect_hfs;
diff --git a/command-list.txt b/command-list.txt
index a1fad28fd8..835c5890be 100644
--- a/command-list.txt
+++ b/command-list.txt
@@ -34,6 +34,7 @@ git-clean mainporcelain
git-clone mainporcelain init
git-column purehelpers
git-commit mainporcelain history
+git-commit-graph plumbingmanipulators
git-commit-tree plumbingmanipulators
git-config ancillarymanipulators
git-count-objects ancillaryinterrogators
diff --git a/commit-graph.c b/commit-graph.c
new file mode 100644
index 0000000000..3fc1e0da27
--- /dev/null
+++ b/commit-graph.c
@@ -0,0 +1,741 @@
+#include "cache.h"
+#include "config.h"
+#include "git-compat-util.h"
+#include "lockfile.h"
+#include "pack.h"
+#include "packfile.h"
+#include "commit.h"
+#include "object.h"
+#include "revision.h"
+#include "sha1-lookup.h"
+#include "commit-graph.h"
+#include "object-store.h"
+
+#define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
+#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
+#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
+#define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */
+#define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */
+
+#define GRAPH_DATA_WIDTH 36
+
+#define GRAPH_VERSION_1 0x1
+#define GRAPH_VERSION GRAPH_VERSION_1
+
+#define GRAPH_OID_VERSION_SHA1 1
+#define GRAPH_OID_LEN_SHA1 GIT_SHA1_RAWSZ
+#define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1
+#define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1
+
+#define GRAPH_OCTOPUS_EDGES_NEEDED 0x80000000
+#define GRAPH_PARENT_MISSING 0x7fffffff
+#define GRAPH_EDGE_LAST_MASK 0x7fffffff
+#define GRAPH_PARENT_NONE 0x70000000
+
+#define GRAPH_LAST_EDGE 0x80000000
+
+#define GRAPH_FANOUT_SIZE (4 * 256)
+#define GRAPH_CHUNKLOOKUP_WIDTH 12
+#define GRAPH_MIN_SIZE (5 * GRAPH_CHUNKLOOKUP_WIDTH + GRAPH_FANOUT_SIZE + \
+ GRAPH_OID_LEN + 8)
+
+char *get_commit_graph_filename(const char *obj_dir)
+{
+ return xstrfmt("%s/info/commit-graph", obj_dir);
+}
+
+static struct commit_graph *alloc_commit_graph(void)
+{
+ struct commit_graph *g = xcalloc(1, sizeof(*g));
+ g->graph_fd = -1;
+
+ return g;
+}
+
+struct commit_graph *load_commit_graph_one(const char *graph_file)
+{
+ void *graph_map;
+ const unsigned char *data, *chunk_lookup;
+ size_t graph_size;
+ struct stat st;
+ uint32_t i;
+ struct commit_graph *graph;
+ int fd = git_open(graph_file);
+ uint64_t last_chunk_offset;
+ uint32_t last_chunk_id;
+ uint32_t graph_signature;
+ unsigned char graph_version, hash_version;
+
+ if (fd < 0)
+ return NULL;
+ if (fstat(fd, &st)) {
+ close(fd);
+ return NULL;
+ }
+ graph_size = xsize_t(st.st_size);
+
+ if (graph_size < GRAPH_MIN_SIZE) {
+ close(fd);
+ die("graph