summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--blame.c137
-rw-r--r--blame.h6
-rw-r--r--builtin/blame.c10
-rw-r--r--builtin/commit.c4
-rw-r--r--builtin/merge.c7
-rw-r--r--commit-graph.c14
-rw-r--r--commit-graph.h9
-rw-r--r--revision.c19
-rw-r--r--t/helper/test-bloom.c28
9 files changed, 211 insertions, 23 deletions
diff --git a/blame.c b/blame.c
index 29770e5c81..da7e28800e 100644
--- a/blame.c
+++ b/blame.c
@@ -9,6 +9,8 @@
#include "blame.h"
#include "alloc.h"
#include "commit-slab.h"
+#include "bloom.h"
+#include "commit-graph.h"
define_commit_slab(blame_suspects, struct blame_origin *);
static struct blame_suspects blame_suspects;
@@ -1246,13 +1248,74 @@ static int fill_blob_sha1_and_mode(struct repository *r,
return -1;
}
+struct blame_bloom_data {
+ /*
+ * Changed-path Bloom filter keys. These can help prevent
+ * computing diffs against first parents, but we need to
+ * expand the list as code is moved or files are renamed.
+ */
+ struct bloom_filter_settings *settings;
+ struct bloom_key **keys;
+ int nr;
+ int alloc;
+};
+
+static int bloom_count_queries = 0;
+static int bloom_count_no = 0;
+static int maybe_changed_path(struct repository *r,
+ struct blame_origin *origin,
+ struct blame_bloom_data *bd)
+{
+ int i;
+ struct bloom_filter *filter;
+
+ if (!bd)
+ return 1;
+
+ if (origin->commit->generation == GENERATION_NUMBER_INFINITY)
+ return 1;
+
+ filter = get_bloom_filter(r, origin->commit, 0);
+
+ if (!filter)
+ return 1;
+
+ bloom_count_queries++;
+ for (i = 0; i < bd->nr; i++) {
+ if (bloom_filter_contains(filter,
+ bd->keys[i],
+ bd->settings))
+ return 1;
+ }
+
+ bloom_count_no++;
+ return 0;
+}
+
+static void add_bloom_key(struct blame_bloom_data *bd,
+ const char *path)
+{
+ if (!bd)
+ return;
+
+ if (bd->nr >= bd->alloc) {
+ bd->alloc *= 2;
+ REALLOC_ARRAY(bd->keys, bd->alloc);
+ }
+
+ bd->keys[bd->nr] = xmalloc(sizeof(struct bloom_key));
+ fill_bloom_key(path, strlen(path), bd->keys[bd->nr], bd->settings);
+ bd->nr++;
+}
+
/*
* We have an origin -- check if the same path exists in the
* parent and return an origin structure to represent it.
*/
static struct blame_origin *find_origin(struct repository *r,
struct commit *parent,
- struct blame_origin *origin)
+ struct blame_origin *origin,
+ struct blame_bloom_data *bd)
{
struct blame_origin *porigin;
struct diff_options diff_opts;
@@ -1286,10 +1349,18 @@ static struct blame_origin *find_origin(struct repository *r,
if (is_null_oid(&origin->commit->object.oid))
do_diff_cache(get_commit_tree_oid(parent), &diff_opts);
- else
- diff_tree_oid(get_commit_tree_oid(parent),
- get_commit_tree_oid(origin->commit),
- "", &diff_opts);
+ else {
+ int compute_diff = 1;
+ if (origin->commit->parents &&
+ !oidcmp(&parent->object.oid,
+ &origin->commit->parents->item->object.oid))
+ compute_diff = maybe_changed_path(r, origin, bd);
+
+ if (compute_diff)
+ diff_tree_oid(get_commit_tree_oid(parent),
+ get_commit_tree_oid(origin->commit),
+ "", &diff_opts);
+ }
diffcore_std(&diff_opts);
if (!diff_queued_diff.nr) {
@@ -1341,7 +1412,8 @@ static struct blame_origin *find_origin(struct repository *r,
*/
static struct blame_origin *find_rename(struct repository *r,
struct commit *parent,
- struct blame_origin *origin)
+ struct blame_origin *origin,
+ struct blame_bloom_data *bd)
{
struct blame_origin *porigin = NULL;
struct diff_options diff_opts;
@@ -1366,6 +1438,7 @@ static struct blame_origin *find_rename(struct repository *r,
struct diff_filepair *p = diff_queued_diff.queue[i];
if ((p->status == 'R' || p->status == 'C') &&
!strcmp(p->two->path, origin->path)) {
+ add_bloom_key(bd, p->one->path);
porigin = get_origin(parent, p->one->path);
oidcpy(&porigin->blob_oid, &p->one->oid);
porigin->mode = p->one->mode;
@@ -2332,6 +2405,11 @@ static void distribute_blame(struct blame_scoreboard *sb, struct blame_entry *bl
#define MAXSG 16
+typedef struct blame_origin *(*blame_find_alg)(struct repository *,
+ struct commit *,
+ struct blame_origin *,
+ struct blame_bloom_data *);
+
static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin, int opt)
{
struct rev_info *revs = sb->revs;
@@ -2356,8 +2434,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
* common cases, then we look for renames in the second pass.
*/
for (pass = 0; pass < 2 - sb->no_whole_file_rename; pass++) {
- struct blame_origin *(*find)(struct repository *, struct commit *, struct blame_origin *);
- find = pass ? find_rename : find_origin;
+ blame_find_alg find = pass ? find_rename : find_origin;
for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
i < num_sg && sg;
@@ -2369,7 +2446,7 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
continue;
if (parse_commit(p))
continue;
- porigin = find(sb->repo, p, origin);
+ porigin = find(sb->repo, p, origin, sb->bloom_data);
if (!porigin)
continue;
if (oideq(&porigin->blob_oid, &origin->blob_oid)) {
@@ -2809,3 +2886,45 @@ struct blame_entry *blame_entry_prepend(struct blame_entry *head,
blame_origin_incref(o);
return new_head;
}
+
+void setup_blame_bloom_data(struct blame_scoreboard *sb,
+ const char *path)
+{
+ struct blame_bloom_data *bd;
+
+ if (!sb->repo->objects->commit_graph)
+ return;
+
+ if (!sb->repo->objects->commit_graph->bloom_filter_settings)
+ return;
+
+ bd = xmalloc(sizeof(struct blame_bloom_data));
+
+ bd->settings = sb->repo->objects->commit_graph->bloom_filter_settings;
+
+ bd->alloc = 4;
+ bd->nr = 0;
+ ALLOC_ARRAY(bd->keys, bd->alloc);
+
+ add_bloom_key(bd, path);
+
+ sb->bloom_data = bd;
+}
+
+void cleanup_scoreboard(struct blame_scoreboard *sb)
+{
+ if (sb->bloom_data) {
+ int i;
+ for (i = 0; i < sb->bloom_data->nr; i++) {
+ free(sb->bloom_data->keys[i]->hashes);
+ free(sb->bloom_data->keys[i]);
+ }
+ free(sb->bloom_data->keys);
+ FREE_AND_NULL(sb->bloom_data);
+
+ trace2_data_intmax("blame", sb->repo,
+ "bloom/queries", bloom_count_queries);
+ trace2_data_intmax("blame", sb->repo,
+ "bloom/response-no", bloom_count_no);
+ }
+}
diff --git a/blame.h b/blame.h
index 089b181ff2..b6bbee4147 100644
--- a/blame.h
+++ b/blame.h
@@ -100,6 +100,8 @@ struct blame_entry {
int unblamable;
};
+struct blame_bloom_data;
+
/*
* The current state of the blame assignment.
*/
@@ -156,6 +158,7 @@ struct blame_scoreboard {
void(*found_guilty_entry)(struct blame_entry *, void *);
void *found_guilty_entry_data;
+ struct blame_bloom_data *bloom_data;
};
/*
@@ -180,6 +183,9 @@ void init_scoreboard(struct blame_scoreboard *sb);
void setup_scoreboard(struct blame_scoreboard *sb,
const char *path,
struct blame_origin **orig);
+void setup_blame_bloom_data(struct blame_scoreboard *sb,
+ const char *path);
+void cleanup_scoreboard(struct blame_scoreboard *sb);
struct blame_entry *blame_entry_prepend(struct blame_entry *head,
long start, long end,
diff --git a/builtin/blame.c b/builtin/blame.c
index bf1cecdf3f..3c13634f27 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1061,6 +1061,14 @@ parse_done:
string_list_clear(&ignore_revs_file_list, 0);
string_list_clear(&ignore_rev_list, 0);
setup_scoreboard(&sb, path, &o);
+
+ /*
+ * Changed-path Bloom filters are disabled when looking
+ * for copies.
+ */
+ if (!(opt & PICKAXE_BLAME_COPY))
+ setup_blame_bloom_data(&sb, path);
+
lno = sb.num_lines;
if (lno && !range_list.nr)
@@ -1164,5 +1172,7 @@ parse_done:
printf("num get patch: %d\n", sb.num_get_patch);
printf("num commits: %d\n", sb.num_commits);
}
+
+ cleanup_scoreboard(&sb);
return 0;
}
diff --git a/builtin/commit.c b/builtin/commit.c
index 09170f5cfb..4743ea5a4c 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1700,9 +1700,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
"new_index file. Check that disk is not full and quota is\n"
"not exceeded, and then \"git restore --staged :/\" to recover."));
- if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) &&
- write_commit_graph_reachable(the_repository->objects->odb, 0, NULL))
- return 1;
+ git_test_write_commit_graph_or_die();
repo_rerere(the_repository, 0);
run_command_v_opt(argv_gc_auto, RUN_GIT_CMD);
diff --git a/builtin/merge.c b/builtin/merge.c
index f78ac752b7..97066a5632 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -40,6 +40,7 @@
#include "branch.h"
#include "commit-reach.h"
#include "wt-status.h"
+#include "commit-graph.h"
#define DEFAULT_TWOHEAD (1<<0)
#define DEFAULT_OCTOPUS (1<<1)
@@ -1700,9 +1701,11 @@ int cmd_merge(int argc, const char **argv, const char *prefix)
head_commit);
}
- if (squash)
+ if (squash) {
finish(head_commit, remoteheads, NULL, NULL);
- else
+
+ git_test_write_commit_graph_or_die();
+ } else
write_merge_state(remoteheads);
if (merge_was_ok)
diff --git a/commit-graph.c b/commit-graph.c
index 7eb4f22f00..6dc777e2f3 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -19,6 +19,20 @@
#include "bloom.h"
#include "commit-slab.h"
+void git_test_write_commit_graph_or_die(void)
+{
+ int flags = 0;
+ if (!git_env_bool(GIT_TEST_COMMIT_GRAPH, 0))
+ return;
+
+ if (git_env_bool(GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS, 0))
+ flags = COMMIT_GRAPH_WRITE_BLOOM_FILTERS;
+
+ if (write_commit_graph_reachable(the_repository->objects->odb,
+ flags, NULL))
+ die("failed to write commit-graph under GIT_TEST_COMMIT_GRAPH");
+}
+
#define GRAPH_SIGNATURE 0x43475048 /* "CGPH" */
#define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */
#define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */
diff --git a/commit-graph.h b/commit-graph.h
index 183a15ed62..4212766a4f 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -12,6 +12,15 @@
#define GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD "GIT_TEST_COMMIT_GRAPH_DIE_ON_LOAD"
#define GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS "GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS"
+/*
+ * This method is only used to enhance coverage of the commit-graph
+ * feature in the test suite with the GIT_TEST_COMMIT_GRAPH and
+ * GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS environment variables. Do not
+ * call this method oustide of a builtin, and only if you know what
+ * you are doing!
+ */
+void git_test_write_commit_graph_or_die(void);
+
struct commit;
struct bloom_filter_settings;
diff --git a/revision.c b/revision.c
index 43857823bd..60cca8c0b9 100644
--- a/revision.c
+++ b/revision.c
@@ -650,6 +650,20 @@ static void trace2_bloom_filter_statistics_atexit(void)
jw_release(&jw);
}
+static int forbid_bloom_filters(struct pathspec *spec)
+{
+ if (spec->has_wildcard)
+ return 1;
+ if (spec->nr > 1)
+ return 1;
+ if (spec->magic & ~PATHSPEC_LITERAL)
+ return 1;
+ if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
+ return 1;
+
+ return 0;
+}
+
static void prepare_to_use_bloom_filter(struct rev_info *revs)
{
struct pathspec_item *pi;
@@ -659,7 +673,10 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
int len;
if (!revs->commits)
- return;
+ return;
+
+ if (forbid_bloom_filters(&revs->prune_data))
+ return;
repo_parse_commit(revs->repo, revs->commits->item);
diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c
index ce412664ba..77eb27adac 100644
--- a/t/helper/test-bloom.c
+++ b/t/helper/test-bloom.c
@@ -27,7 +27,7 @@ static void print_bloom_filter(struct bloom_filter *filter) {
}
printf("Filter_Length:%d\n", (int)filter->len);
printf("Filter_Data:");
- for (i = 0; i < filter->len; i++){
+ for (i = 0; i < filter->len; i++) {
printf("%02x|", filter->data[i]);
}
printf("\n");
@@ -43,22 +43,32 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
print_bloom_filter(filter);
}
+static const char *bloom_usage = "\n"
+" test-tool bloom get_murmer3 <string>\n"
+" test-tool bloom generate_filter <string> [<string>...]\n"
+" test-tool get_filter_for_commit <commit-hex>\n";
+
int cmd__bloom(int argc, const char **argv)
{
+ if (argc < 2)
+ usage(bloom_usage);
+
if (!strcmp(argv[1], "get_murmur3")) {
- uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
+ uint32_t hashed;
+ if (argc < 3)
+ usage(bloom_usage);
+ hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
}
- if (!strcmp(argv[1], "generate_filter")) {
+ if (!strcmp(argv[1], "generate_filter")) {
struct bloom_filter filter;
int i = 2;
filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
filter.data = xcalloc(filter.len, sizeof(unsigned char));
- if (!argv[2]){
- die("at least one input string expected");
- }
+ if (argc - 1 < i)
+ usage(bloom_usage);
while (argv[i]) {
add_string_to_filter(argv[i], &filter);
@@ -68,9 +78,11 @@ int cmd__bloom(int argc, const char **argv)
print_bloom_filter(&filter);
}
- if (!strcmp(argv[1], "get_filter_for_commit")) {
+ if (!strcmp(argv[1], "get_filter_for_commit")) {
struct object_id oid;
const char *end;
+ if (argc < 3)
+ usage(bloom_usage);
if (parse_oid_hex(argv[2], &oid, &end))
die("cannot parse oid '%s'", argv[2]);
init_bloom_filters();
@@ -78,4 +90,4 @@ int cmd__bloom(int argc, const char **argv)
}
return 0;
-} \ No newline at end of file
+}