summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/config.txt2
-rw-r--r--Documentation/config/commitgraph.txt8
-rw-r--r--Documentation/git-commit-graph.txt7
-rw-r--r--Documentation/technical/commit-graph-format.txt2
-rw-r--r--blame.c8
-rw-r--r--bloom.c59
-rw-r--r--bloom.h29
-rw-r--r--builtin/commit-graph.c63
-rw-r--r--commit-graph.c141
-rw-r--r--commit-graph.h17
-rw-r--r--diff.h2
-rw-r--r--fuzz-commit-graph.c5
-rw-r--r--line-log.c2
-rw-r--r--repo-settings.c3
-rw-r--r--repository.h1
-rw-r--r--revision.c7
-rw-r--r--t/helper/test-bloom.c4
-rw-r--r--t/helper/test-read-graph.c3
-rwxr-xr-xt/t0095-bloom.sh8
-rwxr-xr-xt/t4216-log-bloom.sh242
-rwxr-xr-xt/t5324-split-commit-graph.sh13
-rw-r--r--tree-diff.c5
22 files changed, 508 insertions, 123 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt
index f93b6837e4..bf706b950e 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -340,6 +340,8 @@ include::config/column.txt[]
include::config/commit.txt[]
+include::config/commitgraph.txt[]
+
include::config/credential.txt[]
include::config/completion.txt[]
diff --git a/Documentation/config/commitgraph.txt b/Documentation/config/commitgraph.txt
new file mode 100644
index 0000000000..4582c39fc4
--- /dev/null
+++ b/Documentation/config/commitgraph.txt
@@ -0,0 +1,8 @@
+commitGraph.maxNewFilters::
+ Specifies the default value for the `--max-new-filters` option of `git
+ commit-graph write` (c.f., linkgit:git-commit-graph[1]).
+
+commitGraph.readChangedPaths::
+ If true, then git will use the changed-path Bloom filters in the
+ commit-graph file (if it exists, and they are present). Defaults to
+ true. See linkgit:git-commit-graph[1] for more information.
diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index 17405c73a9..de6b6de230 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -67,6 +67,13 @@ this option is given, future commit-graph writes will automatically assume
that this option was intended. Use `--no-changed-paths` to stop storing this
data.
+
+With the `--max-new-filters=<n>` option, generate at most `n` new Bloom
+filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is
+enforced. Only commits present in the new layer count against this
+limit. To retroactively compute Bloom filters over earlier layers, it is
+advised to use `--split=replace`. Overrides the `commitGraph.maxNewFilters`
+configuration.
++
With the `--split[=<strategy>]` option, write the commit-graph as a
chain of multiple commit-graph files stored in
`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt
index 1f86031333..b3b58880b9 100644
--- a/Documentation/technical/commit-graph-format.txt
+++ b/Documentation/technical/commit-graph-format.txt
@@ -125,7 +125,7 @@ CHUNK DATA:
* The rest of the chunk is the concatenation of all the computed Bloom
filters for the commits in lexicographic order.
* Note: Commits with no changes or more than 512 changes have Bloom filters
- of length zero.
+ of length one, with either all bits set to zero or one respectively.
* The BDAT chunk is present if and only if BIDX is present.
Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional]
diff --git a/blame.c b/blame.c
index b475bfa1c0..686845b2b4 100644
--- a/blame.c
+++ b/blame.c
@@ -1276,7 +1276,7 @@ static int maybe_changed_path(struct repository *r,
if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY)
return 1;
- filter = get_bloom_filter(r, origin->commit, 0);
+ filter = get_bloom_filter(r, origin->commit);
if (!filter)
return 1;
@@ -2892,16 +2892,18 @@ void setup_blame_bloom_data(struct blame_scoreboard *sb,
const char *path)
{
struct blame_bloom_data *bd;
+ struct bloom_filter_settings *bs;
if (!sb->repo->objects->commit_graph)
return;
- if (!sb->repo->objects->commit_graph->bloom_filter_settings)
+ bs = get_bloom_filter_settings(sb->repo);
+ if (!bs)
return;
bd = xmalloc(sizeof(struct blame_bloom_data));
- bd->settings = sb->repo->objects->commit_graph->bloom_filter_settings;
+ bd->settings = bs;
bd->alloc = 4;
bd->nr = 0;
diff --git a/bloom.c b/bloom.c
index 1a573226e7..68c73200a5 100644
--- a/bloom.c
+++ b/bloom.c
@@ -38,7 +38,7 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
while (graph_pos < g->num_commits_in_base)
g = g->base_graph;
- /* The commit graph commit 'c' lives in doesn't carry bloom filters. */
+ /* The commit graph commit 'c' lives in doesn't carry Bloom filters. */
if (!g->chunk_bloom_indexes)
return 0;
@@ -177,15 +177,25 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data,
return strcmp(e1->path, e2->path);
}
-struct bloom_filter *get_bloom_filter(struct repository *r,
- struct commit *c,
- int compute_if_not_present)
+static void init_truncated_large_filter(struct bloom_filter *filter)
+{
+ filter->data = xmalloc(1);
+ filter->data[0] = 0xFF;
+ filter->len = 1;
+}
+
+struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
+ struct commit *c,
+ int compute_if_not_present,
+ const struct bloom_filter_settings *settings,
+ enum bloom_filter_computed *computed)
{
struct bloom_filter *filter;
- struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
int i;
struct diff_options diffopt;
- int max_changes = 512;
+
+ if (computed)
+ *computed = BLOOM_NOT_COMPUTED;
if (!bloom_filters.slab_size)
return NULL;
@@ -194,12 +204,11 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
if (!filter->data) {
load_commit_graph_info(r, c);
- if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH &&
- r->objects->commit_graph->chunk_bloom_indexes)
+ if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH)
load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
}
- if (filter->data)
+ if (filter->data && filter->len)
return filter;
if (!compute_if_not_present)
return NULL;
@@ -207,7 +216,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
repo_diff_setup(r, &diffopt);
diffopt.flags.recursive = 1;
diffopt.detect_rename = 0;
- diffopt.max_changes = max_changes;
+ diffopt.max_changes = settings->max_changed_paths;
diff_setup_done(&diffopt);
/* ensure commit is parsed so we have parent information */
@@ -219,7 +228,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
diffcore_std(&diffopt);
- if (diffopt.num_changes <= max_changes) {
+ if (diff_queued_diff.nr <= settings->max_changed_paths) {
struct hashmap pathmap;
struct pathmap_hash_entry *e;
struct hashmap_iter iter;
@@ -256,23 +265,41 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
diff_free_filepair(diff_queued_diff.queue[i]);
}
- filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
+ if (hashmap_get_size(&pathmap) > settings->max_changed_paths) {
+ init_truncated_large_filter(filter);
+ if (computed)
+ *computed |= BLOOM_TRUNC_LARGE;
+ goto cleanup;
+ }
+
+ filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
+ if (!filter->len) {
+ if (computed)
+ *computed |= BLOOM_TRUNC_EMPTY;
+ filter->len = 1;
+ }
filter->data = xcalloc(filter->len, sizeof(unsigned char));
hashmap_for_each_entry(&pathmap, &iter, e, entry) {
struct bloom_key key;
- fill_bloom_key(e->path, strlen(e->path), &key, &settings);
- add_key_to_filter(&key, filter, &settings);
+ fill_bloom_key(e->path, strlen(e->path), &key, settings);
+ add_key_to_filter(&key, filter, settings);
}
+ cleanup:
hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry);
} else {
for (i = 0; i < diff_queued_diff.nr; i++)
diff_free_filepair(diff_queued_diff.queue[i]);
- filter->data = NULL;
- filter->len = 0;
+ init_truncated_large_filter(filter);
+
+ if (computed)
+ *computed |= BLOOM_TRUNC_LARGE;
}
+ if (computed)
+ *computed |= BLOOM_COMPUTED;
+
free(diff_queued_diff.queue);
DIFF_QUEUE_CLEAR(&diff_queued_diff);
diff --git a/bloom.h b/bloom.h
index d8fbb0fbf1..adde6dfe21 100644
--- a/bloom.h
+++ b/bloom.h
@@ -28,9 +28,18 @@ struct bloom_filter_settings {
* that contain n*b bits.
*/
uint32_t bits_per_entry;
+
+ /*
+ * The maximum number of changed paths per commit
+ * before declaring a Bloom filter to be too-large.
+ *
+ * Not written to the commit-graph file.
+ */
+ uint32_t max_changed_paths;
};
-#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
+#define DEFAULT_BLOOM_MAX_CHANGES 512
+#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
#define BITS_PER_WORD 8
#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
@@ -80,9 +89,21 @@ void add_key_to_filter(const struct bloom_key *key,
void init_bloom_filters(void);
-struct bloom_filter *get_bloom_filter(struct repository *r,
- struct commit *c,
- int compute_if_not_present);
+enum bloom_filter_computed {
+ BLOOM_NOT_COMPUTED = (1 << 0),
+ BLOOM_COMPUTED = (1 << 1),
+ BLOOM_TRUNC_LARGE = (1 << 2),
+ BLOOM_TRUNC_EMPTY = (1 << 3),
+};
+
+struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
+ struct commit *c,
+ int compute_if_not_present,
+ const struct bloom_filter_settings *settings,
+ enum bloom_filter_computed *computed);
+
+#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
+ (r), (c), 0, NULL, NULL)
int bloom_filter_contains(const struct bloom_filter *filter,
const struct bloom_key *key,
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 523501f217..988445abdf 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -13,7 +13,8 @@ static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
- "[--changed-paths] [--[no-]progress] <split options>"),
+ "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+ "<split options>"),
NULL
};
@@ -25,7 +26,8 @@ static const char * const builtin_commit_graph_verify_usage[] = {
static const char * const builtin_commit_graph_write_usage[] = {
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
- "[--changed-paths] [--[no-]progress] <split options>"),
+ "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+ "<split options>"),
NULL
};
@@ -106,7 +108,7 @@ static int graph_verify(int argc, const char **argv)
FREE_AND_NULL(graph_name);
if (open_ok)
- graph = load_commit_graph_one_fd_st(fd, &st, odb);
+ graph = load_commit_graph_one_fd_st(the_repository, fd, &st, odb);
else
graph = read_commit_graph_one(the_repository, odb);
@@ -119,7 +121,7 @@ static int graph_verify(int argc, const char **argv)
}
extern int read_replace_refs;
-static struct split_commit_graph_opts split_opts;
+static struct commit_graph_opts write_opts;
static int write_option_parse_split(const struct option *opt, const char *arg,
int unset)
@@ -162,6 +164,35 @@ static int read_one_commit(struct oidset *commits, struct progress *progress,
return 0;
}
+static int write_option_max_new_filters(const struct option *opt,
+ const char *arg,
+ int unset)
+{
+ int *to = opt->value;
+ if (unset)
+ *to = -1;
+ else {
+ const char *s;
+ *to = strtol(arg, (char **)&s, 10);
+ if (*s)
+ return error(_("%s expects a numerical value"),
+ optname(opt, opt->flags));
+ }
+ return 0;
+}
+
+static int git_commit_graph_write_config(const char *var, const char *value,
+ void *cb)
+{
+ if (!strcmp(var, "commitgraph.maxnewfilters"))
+ write_opts.max_new_filters = git_config_int(var, value);
+ /*
+ * No need to fall-back to 'git_default_config', since this was already
+ * called in 'cmd_commit_graph()'.
+ */
+ return 0;
+}
+
static int graph_write(int argc, const char **argv)
{
struct string_list pack_indexes = STRING_LIST_INIT_NODUP;
@@ -187,27 +218,33 @@ static int graph_write(int argc, const char **argv)
OPT_BOOL(0, "changed-paths", &opts.enable_changed_paths,
N_("enable computation for changed paths")),
OPT_BOOL(0, "progress", &opts.progress, N_("force progress reporting")),
- OPT_CALLBACK_F(0, "split", &split_opts.flags, NULL,
+ OPT_CALLBACK_F(0, "split", &write_opts.split_flags, NULL,
N_("allow writing an incremental commit-graph file"),
PARSE_OPT_OPTARG | PARSE_OPT_NONEG,
write_option_parse_split),
- OPT_INTEGER(0, "max-commits", &split_opts.max_commits,
+ OPT_INTEGER(0, "max-commits", &write_opts.max_commits,
N_("maximum number of commits in a non-base split commit-graph")),
- OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple,
+ OPT_INTEGER(0, "size-multiple", &write_opts.size_multiple,
N_("maximum ratio between two levels of a split commit-graph")),
- OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time,
+ OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
N_("only expire files older than a given date-time")),
+ OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
+ NULL, N_("maximum number of changed-path Bloom filters to compute"),
+ 0, write_option_max_new_filters),
OPT_END(),
};
opts.progress = isatty(2);
opts.enable_changed_paths = -1;
- split_opts.size_multiple = 2;
- split_opts.max_commits = 0;
- split_opts.expire_time = 0;
+ write_opts.size_multiple = 2;
+ write_opts.max_commits = 0;
+ write_opts.expire_time = 0;
+ write_opts.max_new_filters = -1;
trace2_cmd_mode("write");
+ git_config(git_commit_graph_write_config, &opts);
+
argc = parse_options(argc, argv, NULL,
builtin_commit_graph_write_options,
builtin_commit_graph_write_usage, 0);
@@ -232,7 +269,7 @@ static int graph_write(int argc, const char **argv)
odb = find_odb(the_repository, opts.obj_dir);
if (opts.reachable) {
- if (write_commit_graph_reachable(odb, flags, &split_opts))
+ if (write_commit_graph_reachable(odb, flags, &write_opts))
return 1;
return 0;
}
@@ -261,7 +298,7 @@ static int graph_write(int argc, const char **argv)
opts.stdin_packs ? &pack_indexes : NULL,
opts.stdin_commits ? &commits : NULL,
flags,
- &split_opts))
+ &write_opts))
result = 1;
cleanup:
diff --git a/commit-graph.c b/commit-graph.c
index 695c1cf7a3..cb042bdba8 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -231,7 +231,8 @@ int open_commit_graph(const char *graph_file, int *fd, struct stat *st)
return 1;
}
-struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st,
+struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
+ int fd, struct stat *st,
struct object_directory *odb)
{
void *graph_map;
@@ -247,7 +248,7 @@ struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st,
}
graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
- ret = parse_commit_graph(graph_map, graph_size);
+ ret = parse_commit_graph(r, graph_map, graph_size);
if (ret)
ret->odb = odb;
@@ -287,7 +288,8 @@ static int verify_commit_graph_lite(struct commit_graph *g)
return 0;
}
-struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
+struct commit_graph *parse_commit_graph(struct repository *r,
+ void *graph_map, size_t graph_size)
{
const unsigned char *data, *chunk_lookup;
uint32_t i;
@@ -325,6 +327,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
return NULL;
}
+ prepare_repo_settings(r);
+
graph = alloc_commit_graph();
graph->hash_len = the_hash_algo->rawsz;
@@ -401,14 +405,14 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
case GRAPH_CHUNKID_BLOOMINDEXES:
if (graph->chunk_bloom_indexes)
chunk_repeated = 1;
- else
+ else if (r->settings.commit_graph_read_changed_paths)
graph->chunk_bloom_indexes = data + chunk_offset;
break;
case GRAPH_CHUNKID_BLOOMDATA:
if (graph->chunk_bloom_data)
chunk_repeated = 1;
- else {
+ else if (r->settings.commit_graph_read_changed_paths) {
uint32_t hash_version;
graph->chunk_bloom_data = data + chunk_offset;
hash_version = get_be32(data + chunk_offset);
@@ -420,6 +424,7 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size)
graph->bloom_filter_settings->hash_version = hash_version;
graph->bloom_filter_settings->num_hashes = get_be32(data + chunk_offset + 4);
graph->bloom_filter_settings->bits_per_entry = get_be32(data + chunk_offset + 8);
+ graph->bloom_filter_settings->max_changed_paths = DEFAULT_BLOOM_MAX_CHANGES;
}
break;
}
@@ -452,7 +457,8 @@ free_and_return:
return NULL;
}
-static struct commit_graph *load_commit_graph_one(const char *graph_file,
+static struct commit_graph *load_commit_graph_one(struct repository *r,
+ const char *graph_file,
struct object_directory *odb)
{
@@ -464,7 +470,7 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file,
if (!open_ok)
return NULL;
- g = load_commit_graph_one_fd_st(fd, &st, odb);
+ g = load_commit_graph_one_fd_st(r, fd, &st, odb);
if (g)
g->filename = xstrdup(graph_file);
@@ -476,7 +482,7 @@ static struct commit_graph *load_commit_graph_v1(struct repository *r,
struct object_directory *odb)
{
char *graph_name = get_commit_graph_filename(odb);
- struct commit_graph *g = load_commit_graph_one(graph_name, odb);
+ struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
free(graph_name);
return g;
@@ -557,7 +563,7 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r,
valid = 0;
for (odb = r->objects->odb; odb; odb = odb->next) {
char *graph_name = get_split_graph_filename(odb, line.buf);
- struct commit_graph *g = load_commit_graph_one(graph_name, odb);
+ struct commit_graph *g = load_commit_graph_one(r, graph_name, odb);
free(graph_name);
@@ -667,6 +673,17 @@ int generation_numbers_enabled(struct repository *r)
return !!first_generation;
}
+struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
+{
+ struct commit_graph *g = r->objects->commit_graph;
+ while (g) {
+ if (g->bloom_filter_settings)
+ return g->bloom_filter_settings;
+ g = g->base_graph;
+ }
+ return NULL;
+}
+
static void close_commit_graph_one(struct commit_graph *g)
{
if (!g)
@@ -952,9 +969,14 @@ struct write_commit_graph_context {
changed_paths:1,
order_by_pack:1;
- const struct split_commit_graph_opts *split_opts;
+ const struct commit_graph_opts *opts;
size_t total_bloom_filter_data_size;
const struct bloom_filter_settings *bloom_settings;
+
+ int count_bloom_filter_computed;
+ int count_bloom_filter_not_computed;
+ int count_bloom_filter_trunc_empty;
+ int count_bloom_filter_trunc_large;
};
static int write_graph_chunk_fanout(struct hashfile *f,
@@ -1166,7 +1188,7 @@ static int write_graph_chunk_bloom_indexes(struct hashfile *f,
uint32_t cur_pos = 0;
while (list < last) {
- struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
+ struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
size_t len = filter ? filter->len : 0;
cur_pos += len;
display_progress(ctx->progress, ++ctx->progress_cnt);
@@ -1185,6 +1207,7 @@ static void trace2_bloom_filter_settings(struct write_commit_graph_context *ctx)
jw_object_intmax(&jw, "hash_version", ctx->bloom_settings->hash_version);
jw_object_intmax(&jw, "num_hashes", ctx->bloom_settings->num_hashes);
jw_object_intmax(&jw, "bits_per_entry", ctx->bloom_settings->bits_per_entry);
+ jw_object_intmax(&jw, "max_changed_paths", ctx->bloom_settings->max_changed_paths);
jw_end(&jw);
trace2_data_json("bloom", ctx->r, "settings", &jw);
@@ -1205,7 +1228,7 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
while (list < last) {
- struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
+ struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
size_t len = filter ? filter->len : 0;
display_progress(ctx->progress, ++ctx->progress_cnt);
@@ -1270,8 +1293,8 @@ static void close_reachable(struct write_commit_graph_context *ctx)
{
int i;
struct commit *commit;
- enum commit_graph_split_flags flags = ctx->split_opts ?
- ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
+ enum commit_graph_split_flags flags = ctx->opts ?
+ ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
if (ctx->report_progress)
ctx->progress = start_delayed_progress(
@@ -1375,11 +1398,24 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
stop_progress(&ctx->progress);
}
+static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx)
+{
+ trace2_data_intmax("commit-graph", ctx->r, "filter-computed",
+ ctx->count_bloom_filter_computed);
+ trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed",
+ ctx->count_bloom_filter_not_computed);
+ trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-empty",
+ ctx->count_bloom_filter_trunc_empty);
+ trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
+ ctx->count_bloom_filter_trunc_large);
+}
+
static void compute_bloom_filters(struct write_commit_graph_context *ctx)
{
int i;
struct progress *progress = NULL;
struct commit **sorted_commits;
+ int max_new_filters;
init_bloom_filters();
@@ -1396,13 +1432,34 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
else
QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
+ max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ?
+ ctx->opts->max_new_filters : ctx->commits.nr;
+
for (i = 0; i < ctx->commits.nr; i++) {
+ enum bloom_filter_computed computed = 0;
struct commit *c = sorted_commits[i];
- struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
- ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
+ struct bloom_filter *filter = get_or_compute_bloom_filter(
+ ctx->r,
+ c,
+ ctx->count_bloom_filter_computed < max_new_filters,
+ ctx->bloom_settings,
+ &computed);
+ if (computed & BLOOM_COMPUTED) {
+ ctx->count_bloom_filter_computed++;
+ if (computed & BLOOM_TRUNC_EMPTY)
+ ctx->count_bloom_filter_trunc_empty++;
+ if (computed & BLOOM_TRUNC_LARGE)
+ ctx->count_bloom_filter_trunc_large++;
+ } else if (computed & BLOOM_NOT_COMPUTED)
+ ctx->count_bloom_filter_not_computed++;
+ ctx->total_bloom_filter_data_size += filter
+ ? sizeof(unsigned char) * filter->len : 0;
display_progress(progress, i + 1);
}
+ if (trace2_is_enabled())
+ trace2_bloom_filter_write_statistics(ctx);
+
free(sorted_commits);
stop_progress(&progress);
}
@@ -1431,7 +1488,7 @@ static int add_ref_to_set(const char *refname,
int write_commit_graph_reachable(struct object_directory *odb,
enum commit_graph_write_flags flags,
- const struct split_commit_graph_opts *split_opts)
+ const struct commit_graph_opts *opts)
{
struct oidset commits = OIDSET_INIT;
struct refs_cb_data data;
@@ -1448,7 +1505,7 @@ int write_commit_graph_reachable(struct object_directory *odb,
stop_progress(&data.progress);
result = write_commit_graph(odb, NULL, &commits,
- flags, split_opts);
+ flags, opts);
oidset_clear(&commits);
return result;
@@ -1563,8 +1620,8 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx)
static void copy_oids_to_commits(struct write_commit_graph_context *ctx)
{
uint32_t i;
- enum commit_graph_split_flags flags = ctx->split_opts ?
- ctx->split_opts->flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
+ enum commit_graph_split_flags flags = ctx->opts ?
+ ctx->opts->split_flags : COMMIT_GRAPH_SPLIT_UNSPECIFIED;
ctx->num_extra_edges = 0;
if (ctx->report_progress)
@@ -1646,15 +1703,6 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
int num_chunks = 3;
uint64_t chunk_offset;
struct object_id file_hash;
- struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
-
- if (!ctx->bloom_settings) {
- bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
- bloom_settings.bits_per_entry);
- bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
- bloom_settings.num_hashes);
- ctx->bloom_settings = &bloom_settings;
- }
if (ctx->split) {
struct strbuf tmp_file = STRBUF_INIT;
@@ -1858,13 +1906,13 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx)
int max_commits = 0;
int size_mult = 2;
- if (ctx->split_opts) {
- max_commits = ctx->split_opts->max_commits;
+ if (ctx->opts) {
+ max_commits = ctx->opts->max_commits;
- if (ctx->split_opts->size_multiple)
- size_mult = ctx->split_opts->size_multiple;
+ if (ctx->opts->size_multiple)
+ size_mult = ctx->opts->size_multiple;
- flags = ctx->split_opts->flags;
+ flags = ctx->opts->split_flags;
}
g = ctx->r->objects->commit_graph;
@@ -2042,8 +2090,8 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx)
size_t dirnamelen;
timestamp_t expire_time = time(NULL);
- if (ctx->split_opts && ctx->split_opts->expire_time)
- expire_time = ctx->split_opts->expire_time;
+ if (ctx->opts && ctx->opts->expire_time)
+ expire_time = ctx->opts->expire_time;
if (!ctx->split) {
char *chain_file_name = get_commit_graph_chain_filename(ctx->odb);
unlink(chain_file_name);
@@ -2094,12 +2142,13 @@ int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes,
struct oidset *commits,
enum commit_graph_write_flags flags,
- const struct split_commit_graph_opts *split_opts)
+ const struct commit_graph_opts *opts)
{
struct write_commit_graph_context *ctx;
uint32_t i, count_distinct = 0;
int res = 0;
int replace = 0;
+ struct bloom_filter_settings bloom_settings = DEFAULT_BLOOM_FILTER_SETTINGS;
if (!commit_graph_compatible(the_repository))
return 0;
@@ -2110,9 +2159,17 @@ int write_commit_graph(struct object_directory *odb,
ctx->append = flags & COMMIT_GRAPH_WRITE_APPEND ? 1 : 0;
ctx->report_progress = flags & COMMIT_GRAPH_WRITE_PROGRESS ? 1 : 0;
ctx->split = flags & COMMIT_GRAPH_WRITE_SPLIT ? 1 : 0;
- ctx->split_opts = split_opts;
+ ctx->opts = opts;
ctx->total_bloom_filter_data_size = 0;
+ bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
+ bloom_settings.bits_per_entry);
+ bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
+ bloom_settings.num_hashes);
+ bloom_settings.max_changed_paths = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS",
+ bloom_settings.max_changed_paths);
+ ctx->bloom_settings = &bloom_settings;
+
if (flags & COMMIT_GRAPH_WRITE_BLOOM_FILTERS)
ctx->changed_paths = 1;
if (!(flags & COMMIT_GRAPH_NO_WRITE_BLOOM_FILTERS)) {
@@ -2150,15 +2207,15 @@ int write_commit_graph(struct object_directory *odb,
}
}
- if (ctx->split_opts)
- replace = ctx->split_opts->flags & COMMIT_GRAPH_SPLIT_REPLACE;
+ if (ctx->opts)
+ replace = ctx->opts->split_flags & COMMIT_GRAPH_SPLIT_REPLACE;
}
ctx->approx_nr_objects = approximate_object_count();
ctx->oids.alloc = ctx->approx_nr_objects / 32;
- if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits)
- ctx->oids.alloc = split_opts->max_commits;
+ if (ctx->split && opts && ctx->oids.alloc > opts->max_commits)
+ ctx->oids.alloc = opts->max_commits;
if (ctx->append) {
prepare_commit_graph_one(ctx->r, ctx->odb);
diff --git a/commit-graph.h b/commit-graph.h
index 765221cdcc..f8e92500c6 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -76,11 +76,13 @@ struct commit_graph {
struct bloom_filter_settings *bloom_filter_settings;
};
-struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st,
+struct commit_graph *load_commit_graph_one_fd_st(struct repository *r,
+ int fd, struct stat *st,
struct object_directory *odb);
struct commit_graph *read_commit_graph_one(struct repository *r,
struct object_directory *odb);
-struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size);
+struct commit_graph *parse_commit_graph(struct repository *r,
+ void *graph_map, size_t graph_size);
/*
* Return 1 if and only if the repository has a commit-graph
@@ -88,6 +90,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size);
*/
int generation_numbers_enabled(struct repository *r);
+struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r);
+
enum commit_graph_write_flags {
COMMIT_GRAPH_WRITE_APPEND = (1 << 0),
COMMIT_GRAPH_WRITE_PROGRESS = (1 << 1),
@@ -102,11 +106,12 @@ enum commit_graph_split_flags {
COMMIT_GRAPH_SPLIT_REPLACE = 2
};
-struct split_commit_graph_opts {
+struct commit_graph_opts {
int size_multiple;
int max_commits;
timestamp_t expire_time;
- enum commit_graph_split_flags flags;
+ enum commit_graph_split_flags split_flags;
+ int max_new_filters;
};
/*
@@ -117,12 +122,12 @@ struct split_commit_graph_opts {
*/
int write_commit_graph_reachable(struct object_directory *odb,
enum commit_graph_write_flags flags,
- const struct split_commit_graph_opts *split_opts);
+ const struct commit_graph_opts *opts);
int write_commit_graph(struct object_directory *odb,
struct string_list *pack_indexes,
struct oidset *commits,
enum commit_graph_write_flags flags,
- const struct split_commit_graph_opts *split_opts);
+ const struct commit_graph_opts *opts);
#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0)
diff --git a/diff.h b/diff.h
index 49242d2733..3de343270f 100644
--- a/diff.h
+++ b/diff.h
@@ -287,8 +287,6 @@ struct diff_options {
/* If non-zero, then stop computing after this many changes. */
int max_changes;
- /* For internal use only. */
- int num_changes;
int ita_invisible_in_index;
/* white-space error highlighting */
diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c
index 430817214d..e7cf6d5b0f 100644
--- a/fuzz-commit-graph.c
+++ b/fuzz-commit-graph.c
@@ -1,7 +1,8 @@
#include "commit-graph.h"
#include "repository.h"
-struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size);
+struct commit_graph *parse_commit_graph(struct repository *r,
+ void *graph_map, size_t graph_size);
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
@@ -10,7 +11,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
struct commit_graph *g;
initialize_the_repository();
- g = parse_commit_graph((void *)data, size);
+ g = parse_commit_graph(the_repository, (void *)data, size);
repo_clear(the_repository);
free_commit_graph(g);
diff --git a/line-log.c b/line-log.c
index bf73ea95ac..68eeb425f8 100644
--- a/line-log.c
+++ b/line-log.c
@@ -1159,7 +1159,7 @@ static int bloom_filter_check(struct rev_info *rev,
return 1;
if (!rev->bloom_filter_settings ||
- !(filter = get_bloom_filter(rev->repo, commit, 0)))
+ !(filter = get_bloom_filter(rev->repo, commit)))
return 1;
if (!range)
diff --git a/repo-settings.c b/repo-settings.c
index aa61a35338..88ccce2036 100644
--- a/repo-settings.c
+++ b/repo-settings.c
@@ -17,9 +17,12 @@ void prepare_repo_settings(struct repository *r)
if (!repo_config_get_bool(r, "core.commitgraph", &value))
r->settings.core_commit_graph = value;
+ if (!repo_config_get_bool(r, "commitgraph.readchangedpaths", &value))
+ r->settings.commit_graph_read_changed_paths = value;
if (!repo_config_get_bool(r, "gc.writecommitgraph", &value))
r->settings.gc_write_commit_graph = value;
UPDATE_DEFAULT_BOOL(r->settings.core_commit_graph, 1);
+ UPDATE_DEFAULT_BOOL(r->settings.commit_graph_read_changed_paths, 1);
UPDATE_DEFAULT_BOOL(r->settings.gc_write_commit_graph, 1);
if (!repo_config_get_int(r, "index.version", &value))
diff --git a/repository.h b/repository.h
index 628c834367..bacf843d46 100644
--- a/repository.h
+++ b/repository.h
@@ -30,6 +30,7 @@ struct repo_settings {
int initialized;
int core_commit_graph;
+ int commit_graph_read_changed_paths;
int gc_write_commit_graph;
int fetch_write_commit_graph;
diff --git a/revision.c b/revision.c
index 067030e64c..d9dc5781ac 100644
--- a/revision.c
+++ b/revision.c
@@ -681,10 +681,7 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
repo_parse_commit(revs->repo, revs->commits->item);
- if (!revs->repo->objects->commit_graph)
- return;
-
- revs->bloom_filter_settings = revs->repo->objects->commit_graph->bloom_filter_settings;
+ revs->bloom_filter_settings = get_bloom_filter_settings(revs->repo);
if (!revs->bloom_filter_settings)
return;
@@ -755,7 +752,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
if (commit_graph_generation(commit) == GENERATION_NUMBER_INFINITY)
return -1;
- filter = get_bloom_filter(revs->repo, commit, 0);
+ filter = get_bloom_filter(revs->repo, commit);
if (!filter) {
count_bloom_filter_not_present++;
diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c
index 5e77d56f59..46e97b04eb 100644
--- a/t/helper/test-bloom.c
+++ b/t/helper/test-bloom.c
@@ -39,7 +39,9 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
struct bloom_filter *filter;
setup_git_directory();
c = lookup_commit(the_repository, commit_oid);
- filter = get_bloom_filter(the_repository, c, 1);
+ filter = get_or_compute_bloom_filter(the_repository, c, 1,
+ &settings,
+ NULL);
print_bloom_filter(filter);
}
diff --git a/t/helper/test-read-graph.c b/t/helper/test-read-graph.c
index 6d0c962438..5f585a1725 100644
--- a/t/helper/test-read-graph.c
+++ b/t/helper/test-read-graph.c
@@ -12,11 +12,12 @@ int cmd__read_graph(int argc, const char **argv)
setup_git_directory();
odb = the_repository->objects->odb;
+ prepare_repo_settings(the_repository);
+
graph = read_commit_graph_one(the_repository, odb);
if (!graph)
return 1;
-
printf("header: %08x %d %d %d %d\n",
ntohl(*(uint32_t*)graph->data),
*(unsigned char*)(graph->data + 4),
diff --git a/t/t0095-bloom.sh b/t/t0095-bloom.sh
index 232ba2c485..7e4ab1795f 100755
--- a/t/t0095-bloom.sh
+++ b/t/t0095-bloom.sh
@@ -71,8 +71,8 @@ test_expect_success 'get bloom filters for commit with no changes' '
git init &&
git commit --allow-empty -m "c0" &&
cat >expect <<-\EOF &&
- Filter_Length:0
- Filter_Data:
+ Filter_Length:1
+ Filter_Data:00|
EOF
test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual &&
test_cmp expect actual
@@ -107,8 +107,8 @@ test_expect_success EXPENSIVE 'get bloom filter for commit with 513 changes' '
git add bigDir &&
git commit -m "commit with 513 changes" &&
cat >expect <<-\EOF &&
- Filter_Length:0
- Filter_Data:
+ Filter_Length:1
+ Filter_Data:ff|
EOF
test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual &&
test_cmp expect actual
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
index 4bb9e9dbe2..d11040ce41 100755
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -30,6 +30,7 @@ test_expect_success 'setup test - repo, commits, commit graph, log outputs' '
rm file_to_be_deleted &&
git add . &&
git commit -m "file removed" &&
+ git commit --allow-empty -m "empty" &&
git commit-graph write --reachable --changed-paths &&
test_oid_cache <<-EOF
@@ -37,6 +38,7 @@ test_expect_success 'setup test - repo, commits, commit graph, log outputs' '
oid_version sha256:2
EOF
'
+
graph_read_expect () {
NUM_CHUNKS=5
cat >expect <<- EOF
@@ -49,7 +51,7 @@ graph_read_expect () {
}
test_expect_success 'commit-graph write wrote out the bloom chunks' '
- graph_read_expect 15
+ graph_read_expect 16
'
# Turn off any inherited trace2 settings for this test.
@@ -58,14 +60,14 @@ sane_unset GIT_TRACE2_PERF_BRIEF
sane_unset GIT_TRACE2_CONFIG_PARAMS
setup () {
- rm "$TRASH_DIRECTORY/trace.perf"
+ rm -f "$TRASH_DIRECTORY/trace.perf" &&
git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom &&
GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom
}
test_bloom_filters_used () {
log_args=$1
- bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"maybe\""
+ bloom_trace_prefix="statistics:{\"filter_not_present\":${2:-0},\"maybe\""
setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom &&
@@ -95,7 +97,9 @@ do
"--ancestry-path side..master"
do
test_expect_success "git log option: $option for path: $path" '
- test_bloom_filters_used "$option -- $path"
+ test_bloom_filters_used "$option -- $path" &&
+ test_config commitgraph.readChangedPaths false &&
+ test_bloom_filters_not_used "$option -- $path"
'
done
done
@@ -139,8 +143,11 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters
test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
'
-test_expect_success 'Do not use Bloom filters if the latest graph does not have Bloom filters.' '
- test_bloom_filters_not_used "-- A/B"
+test_expect_success 'use Bloom filters even if the latest graph does not have Bloom filters' '
+ # Ensure that the number of empty filters is equal to the number of
+ # filters in the latest graph layer to prove that they are loaded (and
+ # ignored).
+ test_bloom_filters_used "-- A/B" 3
'
test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
@@ -151,7 +158,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
test_bloom_filters_used_when_some_filters_are_missing () {
log_args=$1
- bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":8"
+ bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9"
setup "$log_args" &&
grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
test_cmp log_wo_bloom log_w_bloom
@@ -169,31 +176,230 @@ test_expect_success 'persist filter settings' '
GIT_TEST_BLOOM_SETTINGS_NUM_HASHES=9 \
GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY=15 \
git commit-graph write --reachable --changed-paths &&
- grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2.txt &&
+ grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15,\"max_changed_paths\":512" trace2.txt &&
GIT_TRACE2_EVENT="$(pwd)/trace2-auto.txt" \
GIT_TRACE2_EVENT_NESTING=5 \
git commit-graph write --reachable --changed-paths &&
- grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15}" trace2-auto.txt
+ grep "{\"hash_version\":1,\"num_hashes\":9,\"bits_per_entry\":15,\"max_changed_paths\":512" trace2-auto.txt
'
+test_max_changed_paths () {
+ grep "\"max_changed_paths\":$1" $2
+}
+
+test_filter_not_computed () {
+ grep "\"key\":\"filter-not-computed\",\"value\":\"$1\"" $2
+}
+
+test_filter_computed () {
+ grep "\"key\":\"filter-computed\",\"value\":\"$1\"" $2
+}
+
+test_filter_trunc_empty () {
+ grep "\"key\":\"filter-trunc-empty\",\"value\":\"$1\"" $2
+}
+
+test_filter_trunc_large () {
+ grep "\"key\":\"filter-trunc-large\",\"value\":\"$1\"" $2
+}
+
test_expect_success 'correctly report changes over limit' '
- git init 513changes &&
+ git init limits &&
(
- cd 513changes &&
- for i in $(test_seq 1 513)
+ cd limits &&
+ mkdir d &&
+ mkdir d/e &&
+
+ for i in $(test_seq 1 2)
do
- echo $i >file$i.txt || return 1
+ printf $i >d/file$i.txt &&
+ printf $i >d/e/file$i.txt || return 1
done &&
- git add . &&
+
+ mkdir mode &&
+ printf bash >mode/script.sh &&
+
+ mkdir foo &&
+ touch foo/bar &&
+ touch foo.txt &&
+
+ git add d foo foo.txt mode &&
git commit -m "files" &&
- git commit-graph write --reachable --changed-paths &&
- for i in $(test_seq 1 513)
+
+ # Commit has 7 file and 4 directory adds
+ GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS=10 \
+ GIT_TRACE2_EVENT="$(pwd)/trace" \
+ git commit-graph write --reachable --changed-paths &&
+ test_max_changed_paths 10 trace &&
+ test_filter_computed 1 trace &&
+ test_filter_trunc_large 1 trace &&
+
+ for path in $(git ls-tree -r --name-only HEAD)
+ do
+ git -c commitGraph.readChangedPaths=false log \
+ -- $path >expect &&
+ git log -- $path >actual &&
+ test_cmp expect actual || return 1
+ done &&
+
+ # Make a variety of path changes
+ printf new1 >d/e/file1.txt &&
+ printf new2 >d/file2.txt &&
+ rm d/e/file2.txt &&
+ rm -r foo &&
+ printf text >foo &&
+ mkdir f &&
+ printf new1 >f/file1.txt &&
+
+ # including a mode-only change (counts as modified)
+ git update-index --chmod=+x mode/script.sh &&
+
+ git add foo d f &&
+ git commit -m "complicated" &&
+
+ # start from scratch and rebuild
+ rm -f .git/objects/info/commit-graph &&
+ GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS=10 \
+ GIT_TRACE2_EVENT="$(pwd)/trace-edit" \
+ git commit-graph write --reachable --changed-paths &&
+ test_max_changed_paths 10 trace-edit &&
+ test_filter_computed 2 trace-edit &&
+ test_filter_trunc_large 2 trace-edit &&
+
+ for path in $(git ls-tree -r --name-only HEAD)
+ do
+ git -c commitGraph.readChangedPaths=false log \
+ -- $path >expect &&
+ git log -- $path >actual &&
+ test_cmp expect actual || return 1
+ done &&
+
+ # start from scratch and rebuild
+ rm -f .git/objects/info/commit-graph &&
+ GIT_TEST_BLOOM_SETTINGS_MAX_CHANGED_PATHS=11 \
+ GIT_TRACE2_EVENT="$(pwd)/trace-update" \
+ git commit-graph write --reachable --changed-paths &&
+ test_max_changed_paths 11 trace-update &&
+ test_filter_computed 2 trace-update &&
+ test_filter_trunc_large 0 trace-update &&
+
+ for path in $(git ls-tree -r --name-only HEAD)
do
- git -c core.commitGraph=false log -- file$i.txt >expect &&
- git log -- file$i.txt >actual &&
+ git -c commitGraph.readChangedPaths=false log \
+ -- $path >expect &&
+ git log -- $path >actual &&
test_cmp expect actual || return 1
done
)
'
+test_expect_success 'correctly report commits with no changed paths' '
+ git init empty &&
+ test_when_finished "rm -fr empty" &&
+ (
+ cd empty &&
+
+ git commit --allow-empty -m "initial commit" &&
+
+ GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+ git commit-graph write --reachable --changed-paths &&
+ test_filter_computed 1 trace.event &&
+ test_filter_not_computed 0 trace.event &&
+ test_filter_trunc_empty 1 trace.event &&
+ test_filter_trunc_large 0 trace.event
+ )
+'
+
+test_expect_success 'Bloom generation is limited by --max-new-filters' '
+ (
+ cd limits &&
+ test_commit c2 filter &&
+ test_commit c3 filter &&
+ test_commit c4 no-filter &&
+
+ rm -f trace.event &&
+ GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+ git commit-graph write --reachable --split=replace \
+ --changed-paths --max-new-filters=2 &&
+
+ test_filter_computed 2 trace.event &&
+ test_filter_not_computed 3 trace.event &&
+ test_filter_trunc_empty 0 trace.event &&
+ test_filter_trunc_large 0 trace.event
+ )
+'
+
+test_expect_success 'Bloom generation backfills previously-skipped filters' '
+ # Check specifying commitGraph.maxNewFilters over "git config" works.
+ test_config -C limits commitGraph.maxNewFilters 1 &&
+ (
+ cd limits &&
+
+ rm -f trace.event &&
+ GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+ git commit-graph write --reachable --changed-paths \
+ --split=replace &&
+ test_filter_computed 1 trace.event &&
+ test_filter_not_computed 4 trace.event &&
+ test_filter_trunc_empty 0 trace.event &&
+ test_filter_trunc_large 0 trace.event
+ )
+'
+
+test_expect_success '--max-new-filters overrides configuration' '
+ git init override &&
+ test_when_finished "rm -fr override" &&
+ test_config -C override commitGraph.maxNewFilters 2 &&
+ (
+ cd override &&
+ test_commit one &&
+ test_commit two &&
+
+ rm -f trace.event &&
+ GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+ git commit-graph write --reachable --changed-paths \
+ --max-new-filters=1 &&
+ test_filter_computed 1 trace.event &&
+ test_filter_not_computed 1 trace.event &&
+ test_filter_trunc_empty 0 trace.event &&
+ test_filter_trunc_large 0 trace.event
+ )
+'
+
+test_expect_success 'Bloom generation backfills empty commits' '
+ git init empty &&
+ test_when_finished "rm -fr empty" &&
+ (
+ cd empty &&
+ for i in $(test_seq 1 6)
+ do
+ git commit --allow-empty -m "$i"
+ done &&
+
+ # Generate Bloom filters for empty commits 1-6, two at a time.
+ for i in $(test_seq 1 3)
+ do
+ rm -f trace.event &&
+ GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+ git commit-graph write --reachable \
+ --changed-paths --max-new-filters=2 &&
+ test_filter_computed 2 trace.event &&
+ test_filter_not_computed 4 trace.event &&
+ test_filter_trunc_empty 2 trace.event &&
+ test_filter_trunc_large 0 trace.event
+ done &&
+
+ # Finally, make sure that once all commits have filters, that
+ # none are subsequently recomputed.
+ rm -f trace.event &&
+ GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+ git commit-graph write --reachable \
+ --changed-paths --max-new-filters=2 &&
+ test_filter_computed 0 trace.event &&
+ test_filter_not_computed 6 trace.event &&
+ test_filter_trunc_empty 0 trace.event &&
+ test_filter_trunc_large 0 trace.event
+ )
+'
+
test_done
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index 18216463c7..c334ee9155 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -427,4 +427,17 @@ done <<\EOF
0600 -r--------
EOF
+test_expect_success '--split=replace with partial Bloom data' '
+ rm -rf $graphdir $infodir/commit-graph &&
+ git reset --hard commits/3 &&
+ git rev-list -1 HEAD~2 >a &&
+ git rev-list -1 HEAD~1 >b &&
+ git commit-graph write --split=no-merge --stdin-commits --changed-paths <a &&
+ git commit-graph write --split=no-merge --stdin-commits <b &&
+ git commit-graph write --split=replace --stdin-commits --changed-paths <c &&
+ ls $graphdir/graph-*.graph >graph-files &&
+ test_line_count = 1 graph-files &&
+ verify_chain_files_exist $graphdir
+'
+
test_done
diff --git a/tree-diff.c b/tree-diff.c
index 6ebad1a46f..7cebbb327e 100644
--- a/tree-diff.c
+++ b/tree-diff.c
@@ -434,7 +434,7 @@ static struct combine_diff_path *ll_diff_tree_paths(
if (diff_can_quit_early(opt))
break;
- if (opt->max_changes && opt->num_changes > opt->max_changes)
+ if (opt->max_changes && diff_queued_diff.nr > opt->max_changes)
break;
if (opt->pathspec.nr) {
@@ -521,7 +521,6 @@ static struct combine_diff_path *ll_diff_tree_paths(
/* t↓ */
update_tree_entry(&t);
- opt->num_changes++;
}
/* t > p[imin] */
@@ -539,7 +538,6 @@ static struct combine_diff_path *ll_diff_tree_paths(
skip_emit_tp:
/* ∀ pi=p[imin] pi↓ */
update_tp_entries(tp, nparent);
- opt->num_changes++;
}
}
@@ -557,7 +555,6 @@ struct combine_diff_path *diff_tree_paths(
const struct object_id **parents_oid, int nparent,
struct strbuf *base, struct diff_options *opt)
{
- opt->num_changes = 0;
p = ll_diff_tree_paths(p, oid, parents_oid, nparent, base, opt);
/*