summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/technical/commit-graph.txt80
-rw-r--r--commit-graph.c180
-rwxr-xr-xt/t5324-split-commit-graph.sh13
3 files changed, 240 insertions, 33 deletions
diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt
index 1dca3bd8fe..d9c6253b0a 100644
--- a/Documentation/technical/commit-graph.txt
+++ b/Documentation/technical/commit-graph.txt
@@ -186,6 +186,86 @@ positions to refer to their parents, which may be in `graph-{hash1}.graph` or
its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 +
X2).
+Each commit-graph file (except the base, `graph-{hash0}.graph`) contains data
+specifying the hashes of all files in the lower layers. In the above example,
+`graph-{hash1}.graph` contains `{hash0}` while `graph-{hash2}.graph` contains
+`{hash0}` and `{hash1}`.
+
+## Merging commit-graph files
+
+If we only added a new commit-graph file on every write, we would run into a
+linear search problem through many commit-graph files. Instead, we use a merge
+strategy to decide when the stack should collapse some number of levels.
+
+The diagram below shows such a collapse. As a set of new commits are added, it
+is determined by the merge strategy that the files should collapse to
+`graph-{hash1}`. Thus, the new commits, the commits in `graph-{hash2}` and
+the commits in `graph-{hash1}` should be combined into a new `graph-{hash3}`
+file.
+
+ +---------------------+
+ | |
+ | (new commits) |
+ | |
+ +---------------------+
+ | |
+ +-----------------------+ +---------------------+
+ | graph-{hash2} |->| |
+ +-----------------------+ +---------------------+
+ | | |
+ +-----------------------+ +---------------------+
+ | | | |
+ | graph-{hash1} |->| |
+ | | | |
+ +-----------------------+ +---------------------+
+ | tmp_graphXXX
+ +-----------------------+
+ | |
+ | |
+ | |
+ | graph-{hash0} |
+ | |
+ | |
+ | |
+ +-----------------------+
+
+During this process, the commits to write are combined, sorted and we write the
+contents to a temporary file, all while holding a `commit-graph-chain.lock`
+lock-file. When the file is flushed, we rename it to `graph-{hash3}`
+according to the computed `{hash3}`. Finally, we write the new chain data to
+`commit-graph-chain.lock`:
+
+```
+ {hash3}
+ {hash0}
+```
+
+We then close the lock-file.
+
+## Merge Strategy
+
+When writing a set of commits that do not exist in the commit-graph stack of
+height N, we default to creating a new file at level N + 1. We then decide to
+merge with the Nth level if one of two conditions hold:
+
+ 1. The expected file size for level N + 1 is at least half the file size for
+ level N.
+
+ 2. Level N + 1 contains more than 64,0000 commits.
+
+This decision cascades down the levels: when we merge a level we create a new
+set of commits that then compares to the next level.
+
+The first condition bounds the number of levels to be logarithmic in the total
+number of commits. The second condition bounds the total number of commits in
+a `graph-{hashN}` file and not in the `commit-graph` file, preventing
+significant performance issues when the stack merges and another process only
+partially reads the previous stack.
+
+The merge strategy values (2 for the size multiple, 64,000 for the maximum
+number of commits) could be extracted into config settings for full
+flexibility.
+
Related Links
-------------
[0] https://bugs.chromium.org/p/git/issues/detail?id=8
diff --git a/commit-graph.c b/commit-graph.c
index 1224309e5f..fb3100921c 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -1298,36 +1298,6 @@ static int write_graph_chunk_base(struct hashfile *f,
return 0;
}
-static void init_commit_graph_chain(struct write_commit_graph_context *ctx)
-{
- struct commit_graph *g = ctx->r->objects->commit_graph;
- uint32_t i;
-
- ctx->new_base_graph = g;
- ctx->base_graph_name = xstrdup(g->filename);
- ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
-
- ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
-
- ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after);
- ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after);
-
- for (i = 0; i < ctx->num_commit_graphs_before - 1; i++)
- ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]);
-
- if (ctx->num_commit_graphs_before)
- ctx->commit_graph_filenames_after[ctx->num_commit_graphs_before - 1] =
- get_split_graph_filename(ctx->obj_dir, oid_to_hex(&g->oid));
-
- i = ctx->num_commit_graphs_before - 1;
-
- while (g) {
- ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
- i--;
- g = g->base_graph;
- }
-}
-
static int write_commit_graph_file(struct write_commit_graph_context *ctx)
{
uint32_t i;
@@ -1509,6 +1479,145 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
return 0;
}
+static int split_strategy_max_commits = 64000;
+static float split_strategy_size_mult = 2.0f;
+
+static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
+{
+ struct commit_graph *g = ctx->r->objects->commit_graph;
+ uint32_t num_commits = ctx->commits.nr;
+ uint32_t i;
+
+ g = ctx->r->objects->commit_graph;
+ ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1;
+
+ while (g && (g->num_commits <= split_strategy_size_mult * num_commits ||
+ num_commits > split_strategy_max_commits)) {
+ num_commits += g->num_commits;
+ g = g->base_graph;
+
+ ctx->num_commit_graphs_after--;
+ }
+
+ ctx->new_base_graph = g;
+
+ ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after);
+ ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after);
+
+ for (i = 0; i < ctx->num_commit_graphs_after &&
+ i < ctx->num_commit_graphs_before; i++)
+ ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]);
+
+ i = ctx->num_commit_graphs_before - 1;
+ g = ctx->r->objects->commit_graph;
+
+ while (g) {
+ if (i < ctx->num_commit_graphs_after)
+ ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid));
+
+ i--;
+ g = g->base_graph;
+ }
+}
+
+static void merge_commit_graph(struct write_commit_graph_context *ctx,
+ struct commit_graph *g)
+{
+ uint32_t i;
+ uint32_t offset = g->num_commits_in_base;
+
+ ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc);
+
+ for (i = 0; i < g->num_commits; i++) {
+ struct object_id oid;
+ struct commit *result;
+
+ display_progress(ctx->progress, i + 1);
+
+ load_oid_from_graph(g, i + offset, &oid);
+
+ /* only add commits if they still exist in the repo */
+ result = lookup_commit_reference_gently(ctx->r, &oid, 1);
+
+ if (result) {
+ ctx->commits.list[ctx->commits.nr] = result;
+ ctx->commits.nr++;
+ }
+ }
+}
+
+static int commit_compare(const void *_a, const void *_b)
+{
+ const struct commit *a = *(const struct commit **)_a;
+ const struct commit *b = *(const struct commit **)_b;
+ return oidcmp(&a->object.oid, &b->object.oid);
+}
+
+static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx)
+{
+ uint32_t i, num_parents;
+ struct commit_list *parent;
+
+ if (ctx->report_progress)
+ ctx->progress = start_delayed_progress(
+ _("Scanning merged commits"),
+ ctx->commits.nr);
+
+ QSORT(ctx->commits.list, ctx->commits.nr, commit_compare);
+
+ ctx->num_extra_edges = 0;
+ for (i = 0; i < ctx->commits.nr; i++) {
+ display_progress(ctx->progress, i);
+
+ if (i && oideq(&ctx->commits.list[i - 1]->object.oid,
+ &ctx->commits.list[i]->object.oid)) {
+ die(_("unexpected duplicate commit id %s"),
+ oid_to_hex(&ctx->commits.list[i]->object.oid));
+ } else {
+ num_parents = 0;
+ for (parent = ctx->commits.list[i]->parents; parent; parent = parent->next)
+ num_parents++;
+
+ if (num_parents > 2)
+ ctx->num_extra_edges += num_parents - 2;
+ }
+ }
+
+ stop_progress(&ctx->progress);
+}
+
+static void merge_commit_graphs(struct write_commit_graph_context *ctx)
+{
+ struct commit_graph *g = ctx->r->objects->commit_graph;
+ uint32_t current_graph_number = ctx->num_commit_graphs_before;
+ struct strbuf progress_title = STRBUF_INIT;
+
+ while (g && current_graph_number >= ctx->num_commit_graphs_after) {
+ current_graph_number--;
+
+ if (ctx->report_progress) {
+ strbuf_addstr(&progress_title, _("Merging commit-graph"));
+ ctx->progress = start_delayed_progress(progress_title.buf, 0);
+ }
+
+ merge_commit_graph(ctx, g);
+ stop_progress(&ctx->progress);
+ strbuf_release(&progress_title);
+
+ g = g->base_graph;
+ }
+
+ if (g) {
+ ctx->new_base_graph = g;
+ ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base;
+ }
+
+ if (ctx->new_base_graph)
+ ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename);
+
+ sort_and_scan_merged_commits(ctx);
+}
+
int write_commit_graph(const char *obj_dir,
struct string_list *pack_indexes,
struct string_list *commit_hex,
@@ -1554,6 +1663,9 @@ int write_commit_graph(const char *obj_dir,
ctx->approx_nr_objects = approximate_object_count();
ctx->oids.alloc = ctx->approx_nr_objects / 32;
+ if (ctx->split && ctx->oids.alloc > split_strategy_max_commits)
+ ctx->oids.alloc = split_strategy_max_commits;
+
if (ctx->append) {
prepare_commit_graph_one(ctx->r, ctx->obj_dir);
if (ctx->r->objects->commit_graph)
@@ -1607,9 +1719,11 @@ int write_commit_graph(const char *obj_dir,
if (!ctx->commits.nr)
goto cleanup;
- if (ctx->split)
- init_commit_graph_chain(ctx);
- else
+ if (ctx->split) {
+ split_graph_merge_strategy(ctx);
+
+ merge_commit_graphs(ctx);
+ } else
ctx->num_commit_graphs_after = 1;
compute_generation_numbers(ctx);
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index ccd24bd22b..5cb5663a30 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -119,4 +119,17 @@ test_expect_success 'add one commit, write a tip graph' '
graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6
+test_expect_success 'add one commit, write a merged graph' '
+ test_commit 12 &&
+ git branch commits/12 &&
+ git commit-graph write --reachable --split &&
+ test_path_is_file $graphdir/commit-graph-chain &&
+ test_line_count = 2 $graphdir/commit-graph-chain &&
+ ls $graphdir/graph-*.graph >graph-files &&
+ test_line_count = 4 graph-files &&
+ verify_chain_files_exist $graphdir
+'
+
+graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6
+
test_done