diff options
-rw-r--r-- | blame.c | 137 | ||||
-rw-r--r-- | blame.h | 6 | ||||
-rw-r--r-- | builtin/blame.c | 10 | ||||
-rw-r--r-- | builtin/commit.c | 4 | ||||
-rw-r--r-- | builtin/merge.c | 7 | ||||
-rw-r--r-- | commit-graph.c | 14 | ||||
-rw-r--r-- | commit-graph.h | 9 | ||||
-rw-r--r-- | revision.c | 19 | ||||
-rw-r--r-- | t/helper/test-bloom.c | 28 |
9 files changed, 211 insertions, 23 deletions
@@ -9,6 +9,8 @@ #include "blame.h" #include "alloc.h" #include "commit-slab.h" +#include "bloom.h" +#include "commit-graph.h" define_commit_slab(blame_suspects, struct blame_origin *); static struct blame_suspects blame_suspects; @@ -1246,13 +1248,74 @@ static int fill_blob_sha1_and_mode(struct repository *r, return -1; } +struct blame_bloom_data { + /* + * Changed-path Bloom filter keys. These can help prevent + * computing diffs against first parents, but we need to + * expand the list as code is moved or files are renamed. + */ + struct bloom_filter_settings *settings; + struct bloom_key **keys; + int nr; + int alloc; +}; + +static int bloom_count_queries = 0; +static int bloom_count_no = 0; +static int maybe_changed_path(struct repository *r, + struct blame_origin *origin, + struct blame_bloom_data *bd) +{ + int i; + struct bloom_filter *filter; + + if (!bd) + return 1; + + if (origin->commit->generation == GENERATION_NUMBER_INFINITY) + return 1; + + filter = get_bloom_filter(r, origin->commit, 0); + + if (!filter) + return 1; + + bloom_count_queries++; + for (i = 0; i < bd->nr; i++) { + if (bloom_filter_contains(filter, + bd->keys[i], + bd->settings)) + return 1; + } + + bloom_count_no++; + return 0; +} + +static void add_bloom_key(struct blame_bloom_data *bd, + const char *path) +{ + if (!bd) + return; + + if (bd->nr >= bd->alloc) { + bd->alloc *= 2; + REALLOC_ARRAY(bd->keys, bd->alloc); + } + + bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key)); + fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings); + bd->nr++; +} + /* * We have an origin -- check if the same path exists in the * parent and return an origin structure to represent it. */ static struct blame_origin *find_origin(struct repository *r, struct commit *parent, - struct blame_origin *origin) + struct blame_origin *origin, + struct blame_bloom_data *bd) { struct blame_origin *porigin; struct diff_options diff_opts; @@ -1286,10 +1349,18 @@ static struct blame_origin *find_origin(struct repository *r, if (is_null_oid(&origin->commit->object.oid)) do_diff_cache(get_commit_tree_oid(parent), &diff_opts); - else - diff_tree_oid(get_commit_tree_oid(parent), - get_commit_tree_oid(origin->commit), - "", &diff_opts); + else { + int compute_diff = 1; + if (origin->commit->parents && + !oidcmp(&parent->object.oid, + &origin->commit->parents->item->object.oid)) + compute_diff = maybe_changed_path(r, origin, bd); + + if (compute_diff) + diff_tree_oid(get_commit_tree_oid(parent), + get_commit_tree_oid(origin->commit), + "", &diff_opts); + } diffcore_std(&diff_opts); if (!diff_queued_diff.nr) { @@ -1341,7 +1412,8 @@ static struct blame_origin *find_origin(struct repository *r, */ static struct blame_origin *find_rename(struct repository *r, struct commit *parent, - struct blame_origin *origin) + struct blame_origin *origin, + struct blame_bloom_data *bd) { struct blame_origin *porigin = NULL; struct diff_options diff_opts; @@ -1366,6 +1438,7 @@ static struct blame_origin *find_rename(struct repository *r, struct diff_filepair *p = diff_queued_diff.queue[i]; if ((p->status == 'R' || p->status == 'C') && !strcmp(p->two->path, origin->path)) { + add_bloom_key(bd, p->one->path); porigin = get_origin(parent, p->one->path); oidcpy(&porigin->blob_oid, &p->one->oid); porigin->mode = p->one->mode; @@ -2332,6 +2405,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl #define MAXSG 16 +typedef struct blame_origin *(*blame_find_alg)(struct repository *, + struct commit *, + struct blame_origin *, + struct blame_bloom_data *); + static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt) { struct rev_info *revs = sb->revs; @@ -2356,8 +2434,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, * common cases, then we look for renames in the second pass. */ for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) { - struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *); - find = pass ? find_rename : find_origin; + blame_find_alg find = pass ? find_rename : find_origin; for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse); i < num_sg && sg; @@ -2369,7 +2446,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, continue; if (parse_commit(p)) continue; - porigin = find(sb->repo, p, origin); + porigin = find(sb->repo, p, origin, sb->bloom_data); if (!porigin) continue; if (oideq(&porigin->blob_oid, &origin->blob_oid)) { @@ -2809,3 +2886,45 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head, blame_origin_incref(o); return new_head; } + +void setup_blame_bloom_data(struct blame_scoreboard *sb, + const char *path) +{ + struct blame_bloom_data *bd; + + if (!sb->repo->objects->commit_graph) + return; + + if (!sb->repo->objects->commit_graph->bloom_filter_settings) + return; + + bd = xmalloc(sizeof(struct blame_bloom_data)); + + bd->settings = sb->repo->objects->commit_graph->bloom_filter_settings; + + bd->alloc = 4; + bd->nr = 0; + ALLOC_ARRAY(bd->keys, bd->alloc); + + add_bloom_key(bd, path); + + sb->bloom_data = bd; +} + +void cleanup_scoreboard(struct blame_scoreboard *sb) +{ + if (sb->bloom_data) { + int i; + for (i = 0; i < sb->bloom_data->nr; i++) { + free(sb->bloom_data->keys[i]->hashes); + free(sb->bloom_data->keys[i]); + } + free(sb->bloom_data->keys); + FREE_AND_NULL(sb->bloom_data); + + trace2_data_intmax("blame", sb->repo, + "bloom/queries", bloom_count_queries); + trace2_data_intmax("blame", sb->repo, + "bloom/response-no", bloom_count_no); + } +} @@ -100,6 +100,8 @@ struct blame_entry { int unblamable; }; +struct blame_bloom_data; + /* * The current state of the blame assignment. */ @@ -156,6 +158,7 @@ struct blame_scoreboard { void(*found_guilty_entry)(struct blame_entry *, void *); void *found_guilty_entry_data; + struct blame_bloom_data *bloom_data; }; /* @@ -180,6 +183,9 @@ void init_scoreboard(struct blame_scoreboard *sb); void setup_scoreboard(struct blame_scoreboard *sb, const char *path, struct blame_origin **orig); +void setup_blame_bloom_data(struct blame_scoreboard *sb, + const char *path); +void cleanup_scoreboard(struct blame_scoreboard *sb); struct blame_entry *blame_entry_prepend(struct blame_entry *head, long start, long end, diff --git a/builtin/blame.c b/builtin/blame.c index bf1cecdf3f..3c13634f27 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1061,6 +1061,14 @@ parse_done: string_list_clear(&ignore_revs_file_list, 0); string_list_clear(&ignore_rev_list, 0); setup_scoreboard(&sb, path, &o); + + /* + * Changed-path Bloom filters are disabled when looking + * for copies. + */ + if (!(opt & PICKAXE_BLAME_COPY)) + setup_blame_bloom_data(&sb, path); + lno = sb.num_lines; if (lno && !range_list.nr) @@ -1164,5 +1172,7 @@ parse_done: printf("num get patch: %d\n", sb.num_get_patch); printf("num commits: %d\n", sb.num_commits); } + + cleanup_scoreboard(&sb); return 0; } diff --git a/builtin/commit.c b/builtin/commit.c index 09170f5cfb..4743ea5a4c 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1700,9 +1700,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "new_index file. Check that disk is not full and quota is\n" "not exceeded, and then \"git restore --staged :/\" to recover.")); - if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - write_commit_graph_reachable(the_repository->objects->odb, 0, NULL)) - return 1; + git_test_write_commit_graph_or_die(); repo_rerere(the_repository, 0); run_command_v_opt(argv_gc_auto, RUN_GIT_CMD); diff --git a/builtin/merge.c b/builtin/merge.c index f78ac752b7..97066a5632 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -40,6 +40,7 @@ #include "branch.h" #include "commit-reach.h" #include "wt-status.h" +#include "commit-graph.h" #define DEFAULT_TWOHEAD (1<<0) #define DEFAULT_OCTOPUS (1<<1) @@ -1700,9 +1701,11 @@ int cmd_merge(int argc, const char **argv, const char *prefix) head_commit); } - if (squash) + if (squash) { finish(head_commit, remoteheads, NULL, NULL); - else + + git_test_write_commit_graph_or_die(); + } else write_merge_state(remoteheads); if (merge_was_ok) diff --git a/commit-graph.c b/commit-graph.c index 7eb4f22f00..6dc777e2f3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -19,6 +19,20 @@ #include "bloom.h" #include "commit-slab.h" +void git_test_write_commit_graph_or_die(void) +{ + int flags = 0; + if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0)) + return; + + if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0)) + flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS; + + if (write_commit_graph_reachable(the_repository->objects->odb, + flags, NULL)) + die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH"); +} + #define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */ #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ diff --git a/commit-graph.h b/commit-graph.h index 183a15ed62..4212766a4f 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -12,6 +12,15 @@ #define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD" #define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS" +/* + * This method is only used to enhance coverage of the commit-graph + * feature in the test suite with the GIT_TEST_COMMIT_GRAPH and + * GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS environment variables. Do not + * call this method oustide of a builtin, and only if you know what + * you are doing! + */ +void git_test_write_commit_graph_or_die(void); + struct commit; struct bloom_filter_settings; diff --git a/revision.c b/revision.c index 43857823bd..60cca8c0b9 100644 --- a/revision.c +++ b/revision.c @@ -650,6 +650,20 @@ static void trace2_bloom_filter_statistics_atexit(void) jw_release(&jw); } +static int forbid_bloom_filters(struct pathspec *spec) +{ + if (spec->has_wildcard) + return 1; + if (spec->nr > 1) + return 1; + if (spec->magic & ~PATHSPEC_LITERAL) + return 1; + if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL)) + return 1; + + return 0; +} + static void prepare_to_use_bloom_filter(struct rev_info *revs) { struct pathspec_item *pi; @@ -659,7 +673,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs) int len; if (!revs->commits) - return; + return; + + if (forbid_bloom_filters(&revs->prune_data)) + return; repo_parse_commit(revs->repo, revs->commits->item); diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c index ce412664ba..77eb27adac 100644 --- a/t/helper/test-bloom.c +++ b/t/helper/test-bloom.c @@ -27,7 +27,7 @@ static void print_bloom_filter(struct bloom_filter *filter) { } printf("Filter_Length:%d\n", (int)filter->len); printf("Filter_Data:"); - for (i = 0; i < filter->len; i++){ + for (i = 0; i < filter->len; i++) { printf("%02x|", filter->data[i]); } printf("\n"); @@ -43,22 +43,32 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid) print_bloom_filter(filter); } +static const char *bloom_usage = "\n" +" test-tool bloom get_murmer3 <string>\n" +" test-tool bloom generate_filter <string> [<string>...]\n" +" test-tool get_filter_for_commit <commit-hex>\n"; + int cmd__bloom(int argc, const char **argv) { + if (argc < 2) + usage(bloom_usage); + if (!strcmp(argv[1], "get_murmur3")) { - uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2])); + uint32_t hashed; + if (argc < 3) + usage(bloom_usage); + hashed = murmur3_seeded(0, argv[2], strlen(argv[2])); printf("Murmur3 Hash with seed=0:0x%08x\n", hashed); } - if (!strcmp(argv[1], "generate_filter")) { + if (!strcmp(argv[1], "generate_filter")) { struct bloom_filter filter; int i = 2; filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; filter.data = xcalloc(filter.len, sizeof(unsigned char)); - if (!argv[2]){ - die("at least one input string expected"); - } + if (argc - 1 < i) + usage(bloom_usage); while (argv[i]) { add_string_to_filter(argv[i], &filter); @@ -68,9 +78,11 @@ int cmd__bloom(int argc, const char **argv) print_bloom_filter(&filter); } - if (!strcmp(argv[1], "get_filter_for_commit")) { + if (!strcmp(argv[1], "get_filter_for_commit")) { struct object_id oid; const char *end; + if (argc < 3) + usage(bloom_usage); if (parse_oid_hex(argv[2], &oid, &end)) die("cannot parse oid '%s'", argv[2]); init_bloom_filters(); @@ -78,4 +90,4 @@ int cmd__bloom(int argc, const char **argv) } return 0; -}
\ No newline at end of file +} |