From aec8fa1f587d68a0e50ada2720c8dc6e09709a9c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 21 Oct 2006 03:30:53 -0700 Subject: git-pickaxe: swap comparison loop used for -C When assigning blames for code movements across file boundaries, we used to iterate over blame entries (i.e. groups of lines to be blamed) in the outer loop and compared each entry with paths in the parent commit in an inner loop. This meant that we opened the blob data from each path number of times. Reorganize the loop so that we read the same path only once, and compare it against all relevant blame entries. This should perform better, but seems to give mixed results, though. Signed-off-by: Junio C Hamano --- builtin-pickaxe.c | 91 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 50 insertions(+), 41 deletions(-) (limited to 'builtin-pickaxe.c') diff --git a/builtin-pickaxe.c b/builtin-pickaxe.c index cf474b0d11..663b96dac5 100644 --- a/builtin-pickaxe.c +++ b/builtin-pickaxe.c @@ -737,11 +737,27 @@ static int find_copy_in_parent(struct scoreboard *sb, struct diff_options diff_opts; const char *paths[1]; struct blame_entry *e; - int i; + int i, j; + struct blame_list { + struct blame_entry *ent; + struct blame_entry split[3]; + } *blame_list; + int num_ents; - if (find_last_in_target(sb, target) < 0) + /* Count the number of entries the target is suspected for, + * and prepare a list of entry and the best split. + */ + for (e = sb->ent, num_ents = 0; e; e = e->next) + if (!e->guilty && !cmp_suspect(e->suspect, target)) + num_ents++; + if (!num_ents) return 1; /* nothing remains for this target */ + blame_list = xcalloc(num_ents, sizeof(struct blame_list)); + for (e = sb->ent, i = 0; e; e = e->next) + if (!e->guilty && !cmp_suspect(e->suspect, target)) + blame_list[i++].ent = e; + diff_setup(&diff_opts); diff_opts.recursive = 1; diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; @@ -761,52 +777,45 @@ static int find_copy_in_parent(struct scoreboard *sb, "", &diff_opts); diffcore_std(&diff_opts); - - /* - * NEEDSWORK: This loop is wasteful in that it opens the same - * blob in the parent number of times. We should swap the - * loop inside out, which would require keeping track of - * "best blame so far" for blame entries that the current - * "target" is being suspected. - */ - - for (e = sb->ent; e; e = e->next) { - struct blame_entry split[3]; - if (e->guilty || cmp_suspect(e->suspect, target)) + for (i = 0; i < diff_queued_diff.nr; i++) { + struct diff_filepair *p = diff_queued_diff.queue[i]; + struct origin *norigin; + mmfile_t file_p; + char type[10]; + char *blob; + struct blame_entry this[3]; + + if (!DIFF_FILE_VALID(p->one)) + continue; /* does not exist in parent */ + if (porigin && !strcmp(p->one->path, porigin->path)) + /* find_move already dealt with this path */ continue; - memset(split, 0, sizeof(split)); - for (i = 0; i < diff_queued_diff.nr; i++) { - struct diff_filepair *p = diff_queued_diff.queue[i]; - struct origin *norigin; - mmfile_t file_p; - char type[10]; - char *blob; - struct blame_entry this[3]; + norigin = get_origin(sb, parent, p->one->path); + hashcpy(norigin->blob_sha1, p->one->sha1); + blob = read_sha1_file(norigin->blob_sha1, type, + (unsigned long *) &file_p.size); + file_p.ptr = blob; + if (!file_p.ptr) + continue; - if (!DIFF_FILE_VALID(p->one)) - continue; /* does not exist in parent */ - if (porigin && !strcmp(p->one->path, porigin->path)) - /* find_move already dealt with this path */ - continue; - norigin = get_origin(sb, parent, p->one->path); - hashcpy(norigin->blob_sha1, p->one->sha1); - blob = read_sha1_file(norigin->blob_sha1, type, - (unsigned long *) &file_p.size); - file_p.ptr = blob; - if (!file_p.ptr) { - free(blob); - continue; - } - find_copy_in_blob(sb, e, norigin, this, &file_p); - copy_split_if_better(sb, split, this); - free(blob); + for (j = 0; j < num_ents; j++) { + find_copy_in_blob(sb, blame_list[j].ent, norigin, + this, &file_p); + copy_split_if_better(sb, blame_list[j].split, + this); } + free(blob); + } + diff_flush(&diff_opts); + + for (j = 0; j < num_ents; j++) { + struct blame_entry *split = blame_list[j].split; if (split[1].suspect && blame_copy_score < ent_score(sb, &split[1])) - split_blame(sb, split, e); + split_blame(sb, split, blame_list[j].ent); } - diff_flush(&diff_opts); + free(blame_list); return 0; } -- cgit v1.2.3