diff options
Diffstat (limited to 'builtin/blame.c')
-rw-r--r-- | builtin/blame.c | 1959 |
1 files changed, 1156 insertions, 803 deletions
diff --git a/builtin/blame.c b/builtin/blame.c index 101535448f..21f42b0b62 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1,10 +1,12 @@ /* * Blame * - * Copyright (c) 2006, Junio C Hamano + * Copyright (c) 2006, 2014 by its authors + * See COPYING for licensing conditions */ #include "cache.h" +#include "refs.h" #include "builtin.h" #include "blob.h" #include "commit.h" @@ -18,16 +20,22 @@ #include "cache-tree.h" #include "string-list.h" #include "mailmap.h" +#include "mergesort.h" #include "parse-options.h" +#include "prio-queue.h" #include "utf8.h" #include "userdiff.h" +#include "line-range.h" +#include "line-log.h" +#include "dir.h" +#include "progress.h" -static char blame_usage[] = "git blame [options] [rev-opts] [rev] [--] file"; +static char blame_usage[] = N_("git blame [<options>] [<rev-opts>] [<rev>] [--] <file>"); static const char *blame_opt_usage[] = { blame_usage, "", - "[rev-opts] are documented in git-rev-list(1)", + N_("<rev-opts> are documented in git-rev-list(1)"), NULL }; @@ -41,8 +49,11 @@ static int reverse; static int blank_boundary; static int incremental; static int xdl_opts; +static int abbrev = -1; +static int no_whole_file_rename; +static int show_progress; -static enum date_mode blame_date_mode = DATE_ISO8601; +static struct date_mode blame_date_mode = { DATE_ISO8601 }; static size_t blame_date_width; static struct string_list mailmap; @@ -70,7 +81,7 @@ static unsigned blame_copy_score; #define BLAME_DEFAULT_MOVE_SCORE 20 #define BLAME_DEFAULT_COPY_SCORE 40 -/* bits #0..7 in revision.h, #8..11 used for merge_bases() in commit.c */ +/* Remember to update object flag allocation in object.h */ #define METAINFO_SHOWN (1u<<12) #define MORE_THAN_ONE_PATH (1u<<13) @@ -79,20 +90,73 @@ static unsigned blame_copy_score; */ struct origin { int refcnt; + /* Record preceding blame record for this blob */ struct origin *previous; + /* origins are put in a list linked via `next' hanging off the + * corresponding commit's util field in order to make finding + * them fast. The presence in this chain does not count + * towards the origin's reference count. It is tempting to + * let it count as long as the commit is pending examination, + * but even under circumstances where the commit will be + * present multiple times in the priority queue of unexamined + * commits, processing the first instance will not leave any + * work requiring the origin data for the second instance. An + * interspersed commit changing that would have to be + * preexisting with a different ancestry and with the same + * commit date in order to wedge itself between two instances + * of the same commit in the priority queue _and_ produce + * blame entries relevant for it. While we don't want to let + * us get tripped up by this case, it certainly does not seem + * worth optimizing for. + */ + struct origin *next; struct commit *commit; + /* `suspects' contains blame entries that may be attributed to + * this origin's commit or to parent commits. When a commit + * is being processed, all suspects will be moved, either by + * assigning them to an origin in a different commit, or by + * shipping them to the scoreboard's ent list because they + * cannot be attributed to a different commit. + */ + struct blame_entry *suspects; mmfile_t file; unsigned char blob_sha1[20]; + unsigned mode; + /* guilty gets set when shipping any suspects to the final + * blame list instead of other commits + */ + char guilty; char path[FLEX_ARRAY]; }; +struct progress_info { + struct progress *progress; + int blamed_lines; +}; + +static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b, long ctxlen, + xdl_emit_hunk_consume_func_t hunk_func, void *cb_data) +{ + xpparam_t xpp = {0}; + xdemitconf_t xecfg = {0}; + xdemitcb_t ecb = {NULL}; + + xpp.flags = xdl_opts; + xecfg.ctxlen = ctxlen; + xecfg.hunk_func = hunk_func; + ecb.priv = cb_data; + return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb); +} + /* * Prepare diff_filespec and convert it using diff textconv API * if the textconv driver exists. * Return 1 if the conversion succeeds, 0 otherwise. */ int textconv_object(const char *path, + unsigned mode, const unsigned char *sha1, + int sha1_valid, char **buf, unsigned long *buf_size) { @@ -100,7 +164,7 @@ int textconv_object(const char *path, struct userdiff_driver *textconv; df = alloc_filespec(path); - fill_filespec(df, sha1, S_IFREG | 0664); + fill_filespec(df, sha1, sha1_valid, mode); textconv = get_textconv(df); if (!textconv) { free_filespec(df); @@ -125,7 +189,7 @@ static void fill_origin_blob(struct diff_options *opt, num_read_blob++; if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) && - textconv_object(o->path, o->blob_sha1, &file->ptr, &file_size)) + textconv_object(o->path, o->mode, o->blob_sha1, 1, &file->ptr, &file_size)) ; else file->ptr = read_sha1_file(o->blob_sha1, &type, &file_size); @@ -155,10 +219,22 @@ static inline struct origin *origin_incref(struct origin *o) static void origin_decref(struct origin *o) { if (o && --o->refcnt <= 0) { + struct origin *p, *l = NULL; if (o->previous) origin_decref(o->previous); free(o->file.ptr); - free(o); + /* Should be present exactly once in commit chain */ + for (p = o->commit->util; p; l = p, p = p->next) { + if (p == o) { + if (l) + l->next = p->next; + else + o->commit->util = p->next; + free(o); + return; + } + } + die("internal error in blame::origin_decref"); } } @@ -172,11 +248,14 @@ static void drop_origin_blob(struct origin *o) /* * Each group of lines is described by a blame_entry; it can be split - * as we pass blame to the parents. They form a linked list in the - * scoreboard structure, sorted by the target line number. + * as we pass blame to the parents. They are arranged in linked lists + * kept as `suspects' of some unprocessed origin, or entered (when the + * blame origin has been finalized) into the scoreboard structure. + * While the scoreboard structure is only sorted at the end of + * processing (according to final image line number), the lists + * attached to an origin are sorted by the target line number. */ struct blame_entry { - struct blame_entry *prev; struct blame_entry *next; /* the first line of this group in the final image; @@ -190,15 +269,6 @@ struct blame_entry { /* the commit that introduced this group into the final image */ struct origin *suspect; - /* true if the suspect is truly guilty; false while we have not - * checked if the group came from one of its parents. - */ - char guilty; - - /* true if the entry has been scanned for copies in the current parent - */ - char scanned; - /* the line number of the first line of this group in the * suspect's file; internally all line numbers are 0 based. */ @@ -211,11 +281,112 @@ struct blame_entry { }; /* + * Any merge of blames happens on lists of blames that arrived via + * different parents in a single suspect. In this case, we want to + * sort according to the suspect line numbers as opposed to the final + * image line numbers. The function body is somewhat longish because + * it avoids unnecessary writes. + */ + +static struct blame_entry *blame_merge(struct blame_entry *list1, + struct blame_entry *list2) +{ + struct blame_entry *p1 = list1, *p2 = list2, + **tail = &list1; + + if (!p1) + return p2; + if (!p2) + return p1; + + if (p1->s_lno <= p2->s_lno) { + do { + tail = &p1->next; + if ((p1 = *tail) == NULL) { + *tail = p2; + return list1; + } + } while (p1->s_lno <= p2->s_lno); + } + for (;;) { + *tail = p2; + do { + tail = &p2->next; + if ((p2 = *tail) == NULL) { + *tail = p1; + return list1; + } + } while (p1->s_lno > p2->s_lno); + *tail = p1; + do { + tail = &p1->next; + if ((p1 = *tail) == NULL) { + *tail = p2; + return list1; + } + } while (p1->s_lno <= p2->s_lno); + } +} + +static void *get_next_blame(const void *p) +{ + return ((struct blame_entry *)p)->next; +} + +static void set_next_blame(void *p1, void *p2) +{ + ((struct blame_entry *)p1)->next = p2; +} + +/* + * Final image line numbers are all different, so we don't need a + * three-way comparison here. + */ + +static int compare_blame_final(const void *p1, const void *p2) +{ + return ((struct blame_entry *)p1)->lno > ((struct blame_entry *)p2)->lno + ? 1 : -1; +} + +static int compare_blame_suspect(const void *p1, const void *p2) +{ + const struct blame_entry *s1 = p1, *s2 = p2; + /* + * to allow for collating suspects, we sort according to the + * respective pointer value as the primary sorting criterion. + * The actual relation is pretty unimportant as long as it + * establishes a total order. Comparing as integers gives us + * that. + */ + if (s1->suspect != s2->suspect) + return (intptr_t)s1->suspect > (intptr_t)s2->suspect ? 1 : -1; + if (s1->s_lno == s2->s_lno) + return 0; + return s1->s_lno > s2->s_lno ? 1 : -1; +} + +static struct blame_entry *blame_sort(struct blame_entry *head, + int (*compare_fn)(const void *, const void *)) +{ + return llist_mergesort (head, get_next_blame, set_next_blame, compare_fn); +} + +static int compare_commits_by_reverse_commit_date(const void *a, + const void *b, + void *c) +{ + return -compare_commits_by_commit_date(a, b, c); +} + +/* * The current state of the blame assignment. */ struct scoreboard { /* the final commit (i.e. where we started digging from) */ struct commit *final; + /* Priority queue for commits with unassigned blame records */ + struct prio_queue commits; struct rev_info *revs; const char *path; @@ -235,15 +406,6 @@ struct scoreboard { int *lineno; }; -static inline int same_suspect(struct origin *a, struct origin *b) -{ - if (a == b) - return 1; - if (a->commit != b->commit) - return 0; - return !strcmp(a->path, b->path); -} - static void sanity_check_refcnt(struct scoreboard *); /* @@ -256,13 +418,10 @@ static void coalesce(struct scoreboard *sb) struct blame_entry *ent, *next; for (ent = sb->ent; ent && (next = ent->next); ent = next) { - if (same_suspect(ent->suspect, next->suspect) && - ent->guilty == next->guilty && + if (ent->suspect == next->suspect && ent->s_lno + ent->num_lines == next->s_lno) { ent->num_lines += next->num_lines; ent->next = next->next; - if (ent->next) - ent->next->prev = ent; origin_decref(next->suspect); free(next); ent->score = 0; @@ -275,6 +434,30 @@ static void coalesce(struct scoreboard *sb) } /* + * Merge the given sorted list of blames into a preexisting origin. + * If there were no previous blames to that commit, it is entered into + * the commit priority queue of the score board. + */ + +static void queue_blames(struct scoreboard *sb, struct origin *porigin, + struct blame_entry *sorted) +{ + if (porigin->suspects) + porigin->suspects = blame_merge(porigin->suspects, sorted); + else { + struct origin *o; + for (o = porigin->commit->util; o; o = o->next) { + if (o->suspects) { + porigin->suspects = sorted; + return; + } + } + porigin->suspects = sorted; + prio_queue_put(&sb->commits, porigin->commit); + } +} + +/* * Given a commit and a path in it, create a new origin structure. * The callers that add blame to the scoreboard should use * get_origin() to obtain shared, refcounted copy instead of calling @@ -283,26 +466,34 @@ static void coalesce(struct scoreboard *sb) static struct origin *make_origin(struct commit *commit, const char *path) { struct origin *o; - o = xcalloc(1, sizeof(*o) + strlen(path) + 1); + FLEX_ALLOC_STR(o, path, path); o->commit = commit; o->refcnt = 1; - strcpy(o->path, path); + o->next = commit->util; + commit->util = o; return o; } /* * Locate an existing origin or create a new one. + * This moves the origin to front position in the commit util list. */ static struct origin *get_origin(struct scoreboard *sb, struct commit *commit, const char *path) { - struct blame_entry *e; + struct origin *o, *l; - for (e = sb->ent; e; e = e->next) { - if (e->suspect->commit == commit && - !strcmp(e->suspect->path, path)) - return origin_incref(e->suspect); + for (o = commit->util, l = NULL; o; l = o, o = o->next) { + if (!strcmp(o->path, path)) { + /* bump to front */ + if (l) { + l->next = o->next; + o->next = commit->util; + commit->util = o; + } + return origin_incref(o); + } } return make_origin(commit, path); } @@ -313,21 +504,23 @@ static struct origin *get_origin(struct scoreboard *sb, * for an origin is also used to pass the blame for the entire file to * the parent to detect the case where a child's blob is identical to * that of its parent's. + * + * This also fills origin->mode for corresponding tree path. */ -static int fill_blob_sha1(struct origin *origin) +static int fill_blob_sha1_and_mode(struct origin *origin) { - unsigned mode; if (!is_null_sha1(origin->blob_sha1)) return 0; - if (get_tree_entry(origin->commit->object.sha1, + if (get_tree_entry(origin->commit->object.oid.hash, origin->path, - origin->blob_sha1, &mode)) + origin->blob_sha1, &origin->mode)) goto error_out; if (sha1_object_info(origin->blob_sha1, NULL) != OBJ_BLOB) goto error_out; return 0; error_out: hashclr(origin->blob_sha1); + origin->mode = S_IFINVALID; return -1; } @@ -339,39 +532,19 @@ static struct origin *find_origin(struct scoreboard *sb, struct commit *parent, struct origin *origin) { - struct origin *porigin = NULL; + struct origin *porigin; struct diff_options diff_opts; const char *paths[2]; - if (parent->util) { - /* - * Each commit object can cache one origin in that - * commit. This is a freestanding copy of origin and - * not refcounted. - */ - struct origin *cached = parent->util; - if (!strcmp(cached->path, origin->path)) { + /* First check any existing origins */ + for (porigin = parent->util; porigin; porigin = porigin->next) + if (!strcmp(porigin->path, origin->path)) { /* * The same path between origin and its parent * without renaming -- the most common case. */ - porigin = get_origin(sb, parent, cached->path); - - /* - * If the origin was newly created (i.e. get_origin - * would call make_origin if none is found in the - * scoreboard), it does not know the blob_sha1, - * so copy it. Otherwise porigin was in the - * scoreboard and already knows blob_sha1. - */ - if (porigin->refcnt == 1) - hashcpy(porigin->blob_sha1, cached->blob_sha1); - return porigin; + return origin_incref (porigin); } - /* otherwise it was not very useful; free it */ - free(parent->util); - parent->util = NULL; - } /* See if the origin->path is different between parent * and origin first. Most of the time they are the @@ -384,15 +557,16 @@ static struct origin *find_origin(struct scoreboard *sb, paths[0] = origin->path; paths[1] = NULL; - diff_tree_setup_paths(paths, &diff_opts); - if (diff_setup_done(&diff_opts) < 0) - die("diff-setup"); + parse_pathspec(&diff_opts.pathspec, + PATHSPEC_ALL_MAGIC & ~PATHSPEC_LITERAL, + PATHSPEC_LITERAL_PATH, "", paths); + diff_setup_done(&diff_opts); - if (is_null_sha1(origin->commit->object.sha1)) - do_diff_cache(parent->tree->object.sha1, &diff_opts); + if (is_null_oid(&origin->commit->object.oid)) + do_diff_cache(parent->tree->object.oid.hash, &diff_opts); else - diff_tree_sha1(parent->tree->object.sha1, - origin->commit->tree->object.sha1, + diff_tree_sha1(parent->tree->object.oid.hash, + origin->commit->tree->object.oid.hash, "", &diff_opts); diffcore_std(&diff_opts); @@ -400,6 +574,7 @@ static struct origin *find_origin(struct scoreboard *sb, /* The path is the same as parent */ porigin = get_origin(sb, parent, origin->path); hashcpy(porigin->blob_sha1, origin->blob_sha1); + porigin->mode = origin->mode; } else { /* * Since origin->path is a pathspec, if the parent @@ -425,6 +600,7 @@ static struct origin *find_origin(struct scoreboard *sb, case 'M': porigin = get_origin(sb, parent, origin->path); hashcpy(porigin->blob_sha1, p->one->sha1); + porigin->mode = p->one->mode; break; case 'A': case 'T': @@ -433,19 +609,7 @@ static struct origin *find_origin(struct scoreboard *sb, } } diff_flush(&diff_opts); - diff_tree_release_paths(&diff_opts); - if (porigin) { - /* - * Create a freestanding copy that is not part of - * the refcounted origin found in the scoreboard, and - * cache it in the commit. - */ - struct origin *cached; - - cached = make_origin(porigin->commit, porigin->path); - hashcpy(cached->blob_sha1, porigin->blob_sha1); - parent->util = cached; - } + free_pathspec(&diff_opts.pathspec); return porigin; } @@ -460,23 +624,19 @@ static struct origin *find_rename(struct scoreboard *sb, struct origin *porigin = NULL; struct diff_options diff_opts; int i; - const char *paths[2]; diff_setup(&diff_opts); DIFF_OPT_SET(&diff_opts, RECURSIVE); diff_opts.detect_rename = DIFF_DETECT_RENAME; diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; diff_opts.single_follow = origin->path; - paths[0] = NULL; - diff_tree_setup_paths(paths, &diff_opts); - if (diff_setup_done(&diff_opts) < 0) - die("diff-setup"); + diff_setup_done(&diff_opts); - if (is_null_sha1(origin->commit->object.sha1)) - do_diff_cache(parent->tree->object.sha1, &diff_opts); + if (is_null_oid(&origin->commit->object.oid)) + do_diff_cache(parent->tree->object.oid.hash, &diff_opts); else - diff_tree_sha1(parent->tree->object.sha1, - origin->commit->tree->object.sha1, + diff_tree_sha1(parent->tree->object.oid.hash, + origin->commit->tree->object.oid.hash, "", &diff_opts); diffcore_std(&diff_opts); @@ -486,66 +646,53 @@ static struct origin *find_rename(struct scoreboard *sb, !strcmp(p->two->path, origin->path)) { porigin = get_origin(sb, parent, p->one->path); hashcpy(porigin->blob_sha1, p->one->sha1); + porigin->mode = p->one->mode; break; } } diff_flush(&diff_opts); - diff_tree_release_paths(&diff_opts); + free_pathspec(&diff_opts.pathspec); return porigin; } /* - * Link in a new blame entry to the scoreboard. Entries that cover the - * same line range have been removed from the scoreboard previously. + * Append a new blame entry to a given output queue. */ -static void add_blame_entry(struct scoreboard *sb, struct blame_entry *e) +static void add_blame_entry(struct blame_entry ***queue, struct blame_entry *e) { - struct blame_entry *ent, *prev = NULL; - origin_incref(e->suspect); - for (ent = sb->ent; ent && ent->lno < e->lno; ent = ent->next) - prev = ent; - - /* prev, if not NULL, is the last one that is below e */ - e->prev = prev; - if (prev) { - e->next = prev->next; - prev->next = e; - } - else { - e->next = sb->ent; - sb->ent = e; - } - if (e->next) - e->next->prev = e; + e->next = **queue; + **queue = e; + *queue = &e->next; } /* * src typically is on-stack; we want to copy the information in it to - * a malloced blame_entry that is already on the linked list of the - * scoreboard. The origin of dst loses a refcnt while the origin of src - * gains one. + * a malloced blame_entry that gets added to the given queue. The + * origin of dst loses a refcnt. */ -static void dup_entry(struct blame_entry *dst, struct blame_entry *src) +static void dup_entry(struct blame_entry ***queue, + struct blame_entry *dst, struct blame_entry *src) { - struct blame_entry *p, *n; - - p = dst->prev; - n = dst->next; origin_incref(src->suspect); origin_decref(dst->suspect); memcpy(dst, src, sizeof(*src)); - dst->prev = p; - dst->next = n; - dst->score = 0; + dst->next = **queue; + **queue = dst; + *queue = &dst->next; } -static const char *nth_line(struct scoreboard *sb, int lno) +static const char *nth_line(struct scoreboard *sb, long lno) { return sb->final_buf + sb->lineno[lno]; } +static const char *nth_line_cb(void *data, long lno) +{ + return nth_line((struct scoreboard *)data, lno); +} + /* * It is known that lines between tlno to same came from parent, and e * has an overlap with that range. it also is known that parent's @@ -605,10 +752,11 @@ static void split_overlap(struct blame_entry *split, /* * split_overlap() divided an existing blame e into up to three parts - * in split. Adjust the linked list of blames in the scoreboard to + * in split. Any assigned blame is moved to queue to * reflect the split. */ -static void split_blame(struct scoreboard *sb, +static void split_blame(struct blame_entry ***blamed, + struct blame_entry ***unblamed, struct blame_entry *split, struct blame_entry *e) { @@ -616,61 +764,39 @@ static void split_blame(struct scoreboard *sb, if (split[0].suspect && split[2].suspect) { /* The first part (reuse storage for the existing entry e) */ - dup_entry(e, &split[0]); + dup_entry(unblamed, e, &split[0]); /* The last part -- me */ new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); - add_blame_entry(sb, new_entry); + add_blame_entry(unblamed, new_entry); /* ... and the middle part -- parent */ new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); - add_blame_entry(sb, new_entry); + add_blame_entry(blamed, new_entry); } else if (!split[0].suspect && !split[2].suspect) /* * The parent covers the entire area; reuse storage for * e and replace it with the parent. */ - dup_entry(e, &split[1]); + dup_entry(blamed, e, &split[1]); else if (split[0].suspect) { /* me and then parent */ - dup_entry(e, &split[0]); + dup_entry(unblamed, e, &split[0]); new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[1]), sizeof(struct blame_entry)); - add_blame_entry(sb, new_entry); + add_blame_entry(blamed, new_entry); } else { /* parent and then me */ - dup_entry(e, &split[1]); + dup_entry(blamed, e, &split[1]); new_entry = xmalloc(sizeof(*new_entry)); memcpy(new_entry, &(split[2]), sizeof(struct blame_entry)); - add_blame_entry(sb, new_entry); - } - - if (DEBUG) { /* sanity */ - struct blame_entry *ent; - int lno = sb->ent->lno, corrupt = 0; - - for (ent = sb->ent; ent; ent = ent->next) { - if (lno != ent->lno) - corrupt = 1; - if (ent->s_lno < 0) - corrupt = 1; - lno += ent->num_lines; - } - if (corrupt) { - lno = sb->ent->lno; - for (ent = sb->ent; ent; ent = ent->next) { - printf("L %8d l %8d n %8d\n", - lno, ent->lno, ent->num_lines); - lno = ent->lno + ent->num_lines; - } - die("oops"); - } + add_blame_entry(unblamed, new_entry); } } @@ -687,73 +813,147 @@ static void decref_split(struct blame_entry *split) } /* - * Helper for blame_chunk(). blame_entry e is known to overlap with - * the patch hunk; split it and pass blame to the parent. + * reverse_blame reverses the list given in head, appending tail. + * That allows us to build lists in reverse order, then reverse them + * afterwards. This can be faster than building the list in proper + * order right away. The reason is that building in proper order + * requires writing a link in the _previous_ element, while building + * in reverse order just requires placing the list head into the + * _current_ element. */ -static void blame_overlap(struct scoreboard *sb, struct blame_entry *e, - int tlno, int plno, int same, - struct origin *parent) -{ - struct blame_entry split[3]; - - split_overlap(split, e, tlno, plno, same, parent); - if (split[1].suspect) - split_blame(sb, split, e); - decref_split(split); -} -/* - * Find the line number of the last line the target is suspected for. - */ -static int find_last_in_target(struct scoreboard *sb, struct origin *target) +static struct blame_entry *reverse_blame(struct blame_entry *head, + struct blame_entry *tail) { - struct blame_entry *e; - int last_in_target = -1; - - for (e = sb->ent; e; e = e->next) { - if (e->guilty || !same_suspect(e->suspect, target)) - continue; - if (last_in_target < e->s_lno + e->num_lines) - last_in_target = e->s_lno + e->num_lines; + while (head) { + struct blame_entry *next = head->next; + head->next = tail; + tail = head; + head = next; } - return last_in_target; + return tail; } /* * Process one hunk from the patch between the current suspect for - * blame_entry e and its parent. Find and split the overlap, and - * pass blame to the overlapping part to the parent. + * blame_entry e and its parent. This first blames any unfinished + * entries before the chunk (which is where target and parent start + * differing) on the parent, and then splits blame entries at the + * start and at the end of the difference region. Since use of -M and + * -C options may lead to overlapping/duplicate source line number + * ranges, all we can rely on from sorting/merging is the order of the + * first suspect line number. */ -static void blame_chunk(struct scoreboard *sb, - int tlno, int plno, int same, - struct origin *target, struct origin *parent) +static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq, + int tlno, int offset, int same, + struct origin *parent) { - struct blame_entry *e; + struct blame_entry *e = **srcq; + struct blame_entry *samep = NULL, *diffp = NULL; - for (e = sb->ent; e; e = e->next) { - if (e->guilty || !same_suspect(e->suspect, target)) - continue; - if (same <= e->s_lno) - continue; - if (tlno < e->s_lno + e->num_lines) - blame_overlap(sb, e, tlno, plno, same, parent); + while (e && e->s_lno < tlno) { + struct blame_entry *next = e->next; + /* + * current record starts before differing portion. If + * it reaches into it, we need to split it up and + * examine the second part separately. + */ + if (e->s_lno + e->num_lines > tlno) { + /* Move second half to a new record */ + int len = tlno - e->s_lno; + struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry)); + n->suspect = e->suspect; + n->lno = e->lno + len; + n->s_lno = e->s_lno + len; + n->num_lines = e->num_lines - len; + e->num_lines = len; + e->score = 0; + /* Push new record to diffp */ + n->next = diffp; + diffp = n; + } else + origin_decref(e->suspect); + /* Pass blame for everything before the differing + * chunk to the parent */ + e->suspect = origin_incref(parent); + e->s_lno += offset; + e->next = samep; + samep = e; + e = next; + } + /* + * As we don't know how much of a common stretch after this + * diff will occur, the currently blamed parts are all that we + * can assign to the parent for now. + */ + + if (samep) { + **dstq = reverse_blame(samep, **dstq); + *dstq = &samep->next; + } + /* + * Prepend the split off portions: everything after e starts + * after the blameable portion. + */ + e = reverse_blame(diffp, e); + + /* + * Now retain records on the target while parts are different + * from the parent. + */ + samep = NULL; + diffp = NULL; + while (e && e->s_lno < same) { + struct blame_entry *next = e->next; + + /* + * If current record extends into sameness, need to split. + */ + if (e->s_lno + e->num_lines > same) { + /* + * Move second half to a new record to be + * processed by later chunks + */ + int len = same - e->s_lno; + struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry)); + n->suspect = origin_incref(e->suspect); + n->lno = e->lno + len; + n->s_lno = e->s_lno + len; + n->num_lines = e->num_lines - len; + e->num_lines = len; + e->score = 0; + /* Push new record to samep */ + n->next = samep; + samep = n; + } + e->next = diffp; + diffp = e; + e = next; } + **srcq = reverse_blame(diffp, reverse_blame(samep, e)); + /* Move across elements that are in the unblamable portion */ + if (diffp) + *srcq = &diffp->next; } struct blame_chunk_cb_data { - struct scoreboard *sb; - struct origin *target; struct origin *parent; - long plno; - long tlno; + long offset; + struct blame_entry **dstq; + struct blame_entry **srcq; }; -static void blame_chunk_cb(void *data, long same, long p_next, long t_next) +/* diff chunks are from parent to target */ +static int blame_chunk_cb(long start_a, long count_a, + long start_b, long count_b, void *data) { struct blame_chunk_cb_data *d = data; - blame_chunk(d->sb, d->tlno, d->plno, same, d->target, d->parent); - d->plno = p_next; - d->tlno = t_next; + if (start_a - start_b != d->offset) + die("internal error in blame::blame_chunk_cb"); + blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b, + start_b + count_b, d->parent); + d->offset = start_a + count_a - (start_b + count_b); + return 0; } /* @@ -761,34 +961,35 @@ static void blame_chunk_cb(void *data, long same, long p_next, long t_next) * for the lines it is suspected to its parent. Run diff to find * which lines came from parent and pass blame for them. */ -static int pass_blame_to_parent(struct scoreboard *sb, - struct origin *target, - struct origin *parent) +static void pass_blame_to_parent(struct scoreboard *sb, + struct origin *target, + struct origin *parent) { - int last_in_target; mmfile_t file_p, file_o; struct blame_chunk_cb_data d; - xpparam_t xpp; - xdemitconf_t xecfg; - memset(&d, 0, sizeof(d)); - d.sb = sb; d.target = target; d.parent = parent; - last_in_target = find_last_in_target(sb, target); - if (last_in_target < 0) - return 1; /* nothing remains for this target */ + struct blame_entry *newdest = NULL; + + if (!target->suspects) + return; /* nothing remains for this target */ + + d.parent = parent; + d.offset = 0; + d.dstq = &newdest; d.srcq = &target->suspects; fill_origin_blob(&sb->revs->diffopt, parent, &file_p); fill_origin_blob(&sb->revs->diffopt, target, &file_o); num_get_patch++; - memset(&xpp, 0, sizeof(xpp)); - xpp.flags = xdl_opts; - memset(&xecfg, 0, sizeof(xecfg)); - xecfg.ctxlen = 0; - xdi_diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, &xpp, &xecfg); - /* The rest (i.e. anything after tlno) are the same as the parent */ - blame_chunk(sb, d.tlno, d.plno, last_in_target, target, parent); + if (diff_hunks(&file_p, &file_o, 0, blame_chunk_cb, &d)) + die("unable to generate diff (%s -> %s)", + oid_to_hex(&parent->commit->object.oid), + oid_to_hex(&target->commit->object.oid)); + /* The rest are the same as the parent */ + blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent); + *d.dstq = NULL; + queue_blames(sb, parent, newdest); - return 0; + return; } /* @@ -888,12 +1089,15 @@ struct handle_split_cb_data { long tlno; }; -static void handle_split_cb(void *data, long same, long p_next, long t_next) +static int handle_split_cb(long start_a, long count_a, + long start_b, long count_b, void *data) { struct handle_split_cb_data *d = data; - handle_split(d->sb, d->ent, d->tlno, d->plno, same, d->parent, d->split); - d->plno = p_next; - d->tlno = t_next; + handle_split(d->sb, d->ent, d->tlno, d->plno, start_b, d->parent, + d->split); + d->plno = start_a + count_a; + d->tlno = start_b + count_b; + return 0; } /* @@ -908,11 +1112,9 @@ static void find_copy_in_blob(struct scoreboard *sb, mmfile_t *file_p) { const char *cp; - int cnt; mmfile_t file_o; struct handle_split_cb_data d; - xpparam_t xpp; - xdemitconf_t xecfg; + memset(&d, 0, sizeof(d)); d.sb = sb; d.ent = ent; d.parent = parent; d.split = split; /* @@ -920,65 +1122,94 @@ static void find_copy_in_blob(struct scoreboard *sb, */ cp = nth_line(sb, ent->lno); file_o.ptr = (char *) cp; - cnt = ent->num_lines; - - while (cnt && cp < sb->final_buf + sb->final_buf_size) { - if (*cp++ == '\n') - cnt--; - } - file_o.size = cp - file_o.ptr; + file_o.size = nth_line(sb, ent->lno + ent->num_lines) - cp; /* * file_o is a part of final image we are annotating. * file_p partially may match that image. */ - memset(&xpp, 0, sizeof(xpp)); - xpp.flags = xdl_opts; - memset(&xecfg, 0, sizeof(xecfg)); - xecfg.ctxlen = 1; memset(split, 0, sizeof(struct blame_entry [3])); - xdi_diff_hunks(file_p, &file_o, handle_split_cb, &d, &xpp, &xecfg); + if (diff_hunks(file_p, &file_o, 1, handle_split_cb, &d)) + die("unable to generate diff (%s)", + oid_to_hex(&parent->commit->object.oid)); /* remainder, if any, all match the preimage */ handle_split(sb, ent, d.tlno, d.plno, ent->num_lines, parent, split); } +/* Move all blame entries from list *source that have a score smaller + * than score_min to the front of list *small. + * Returns a pointer to the link pointing to the old head of the small list. + */ + +static struct blame_entry **filter_small(struct scoreboard *sb, + struct blame_entry **small, + struct blame_entry **source, + unsigned score_min) +{ + struct blame_entry *p = *source; + struct blame_entry *oldsmall = *small; + while (p) { + if (ent_score(sb, p) <= score_min) { + *small = p; + small = &p->next; + p = *small; + } else { + *source = p; + source = &p->next; + p = *source; + } + } + *small = oldsmall; + *source = NULL; + return small; +} + /* * See if lines currently target is suspected for can be attributed to * parent. */ -static int find_move_in_parent(struct scoreboard *sb, - struct origin *target, - struct origin *parent) +static void find_move_in_parent(struct scoreboard *sb, + struct blame_entry ***blamed, + struct blame_entry **toosmall, + struct origin *target, + struct origin *parent) { - int last_in_target, made_progress; struct blame_entry *e, split[3]; + struct blame_entry *unblamed = target->suspects; + struct blame_entry *leftover = NULL; mmfile_t file_p; - last_in_target = find_last_in_target(sb, target); - if (last_in_target < 0) - return 1; /* nothing remains for this target */ + if (!unblamed) + return; /* nothing remains for this target */ fill_origin_blob(&sb->revs->diffopt, parent, &file_p); if (!file_p.ptr) - return 0; + return; - made_progress = 1; - while (made_progress) { - made_progress = 0; - for (e = sb->ent; e; e = e->next) { - if (e->guilty || !same_suspect(e->suspect, target) || - ent_score(sb, e) < blame_move_score) - continue; + /* At each iteration, unblamed has a NULL-terminated list of + * entries that have not yet been tested for blame. leftover + * contains the reversed list of entries that have been tested + * without being assignable to the parent. + */ + do { + struct blame_entry **unblamedtail = &unblamed; + struct blame_entry *next; + for (e = unblamed; e; e = next) { + next = e->next; find_copy_in_blob(sb, e, parent, split, &file_p); if (split[1].suspect && blame_move_score < ent_score(sb, &split[1])) { - split_blame(sb, split, e); - made_progress = 1; + split_blame(blamed, &unblamedtail, split, e); + } else { + e->next = leftover; + leftover = e; } decref_split(split); } - } - return 0; + *unblamedtail = NULL; + toosmall = filter_small(sb, toosmall, &unblamed, blame_move_score); + } while (unblamed); + target->suspects = reverse_blame(leftover, NULL); } struct blame_list { @@ -990,72 +1221,52 @@ struct blame_list { * Count the number of entries the target is suspected for, * and prepare a list of entry and the best split. */ -static struct blame_list *setup_blame_list(struct scoreboard *sb, - struct origin *target, - int min_score, +static struct blame_list *setup_blame_list(struct blame_entry *unblamed, int *num_ents_p) { struct blame_entry *e; int num_ents, i; struct blame_list *blame_list = NULL; - for (e = sb->ent, num_ents = 0; e; e = e->next) - if (!e->scanned && !e->guilty && - same_suspect(e->suspect, target) && - min_score < ent_score(sb, e)) - num_ents++; + for (e = unblamed, num_ents = 0; e; e = e->next) + num_ents++; if (num_ents) { blame_list = xcalloc(num_ents, sizeof(struct blame_list)); - for (e = sb->ent, i = 0; e; e = e->next) - if (!e->scanned && !e->guilty && - same_suspect(e->suspect, target) && - min_score < ent_score(sb, e)) - blame_list[i++].ent = e; + for (e = unblamed, i = 0; e; e = e->next) + blame_list[i++].ent = e; } *num_ents_p = num_ents; return blame_list; } /* - * Reset the scanned status on all entries. - */ -static void reset_scanned_flag(struct scoreboard *sb) -{ - struct blame_entry *e; - for (e = sb->ent; e; e = e->next) - e->scanned = 0; -} - -/* * For lines target is suspected for, see if we can find code movement * across file boundary from the parent commit. porigin is the path * in the parent we already tried. */ -static int find_copy_in_parent(struct scoreboard *sb, - struct origin *target, - struct commit *parent, - struct origin *porigin, - int opt) +static void find_copy_in_parent(struct scoreboard *sb, + struct blame_entry ***blamed, + struct blame_entry **toosmall, + struct origin *target, + struct commit *parent, + struct origin *porigin, + int opt) { struct diff_options diff_opts; - const char *paths[1]; int i, j; - int retval; struct blame_list *blame_list; int num_ents; + struct blame_entry *unblamed = target->suspects; + struct blame_entry *leftover = NULL; - blame_list = setup_blame_list(sb, target, blame_copy_score, &num_ents); - if (!blame_list) - return 1; /* nothing remains for this target */ + if (!unblamed) + return; /* nothing remains for this target */ diff_setup(&diff_opts); DIFF_OPT_SET(&diff_opts, RECURSIVE); diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; - paths[0] = NULL; - diff_tree_setup_paths(paths, &diff_opts); - if (diff_setup_done(&diff_opts) < 0) - die("diff-setup"); + diff_setup_done(&diff_opts); /* Try "find copies harder" on new path if requested; * we do not want to use diffcore_rename() actually to @@ -1069,19 +1280,19 @@ static int find_copy_in_parent(struct scoreboard *sb, && (!porigin || strcmp(target->path, porigin->path)))) DIFF_OPT_SET(&diff_opts, FIND_COPIES_HARDER); - if (is_null_sha1(target->commit->object.sha1)) - do_diff_cache(parent->tree->object.sha1, &diff_opts); + if (is_null_oid(&target->commit->object.oid)) + do_diff_cache(parent->tree->object.oid.hash, &diff_opts); else - diff_tree_sha1(parent->tree->object.sha1, - target->commit->tree->object.sha1, + diff_tree_sha1(parent->tree->object.oid.hash, + target->commit->tree->object.oid.hash, "", &diff_opts); if (!DIFF_OPT_TST(&diff_opts, FIND_COPIES_HARDER)) diffcore_std(&diff_opts); - retval = 0; - while (1) { - int made_progress = 0; + do { + struct blame_entry **unblamedtail = &unblamed; + blame_list = setup_blame_list(unblamed, &num_ents); for (i = 0; i < diff_queued_diff.nr; i++) { struct diff_filepair *p = diff_queued_diff.queue[i]; @@ -1099,6 +1310,7 @@ static int find_copy_in_parent(struct scoreboard *sb, norigin = get_origin(sb, parent, p->one->path); hashcpy(norigin->blob_sha1, p->one->sha1); + norigin->mode = p->one->mode; fill_origin_blob(&sb->revs->diffopt, norigin, &file_p); if (!file_p.ptr) continue; @@ -1117,27 +1329,21 @@ static int find_copy_in_parent(struct scoreboard *sb, struct blame_entry *split = blame_list[j].split; if (split[1].suspect && blame_copy_score < ent_score(sb, &split[1])) { - split_blame(sb, split, blame_list[j].ent); - made_progress = 1; + split_blame(blamed, &unblamedtail, split, + blame_list[j].ent); + } else { + blame_list[j].ent->next = leftover; + leftover = blame_list[j].ent; } - else - blame_list[j].ent->scanned = 1; decref_split(split); } free(blame_list); - - if (!made_progress) - break; - blame_list = setup_blame_list(sb, target, blame_copy_score, &num_ents); - if (!blame_list) { - retval = 1; - break; - } - } - reset_scanned_flag(sb); + *unblamedtail = NULL; + toosmall = filter_small(sb, toosmall, &unblamed, blame_copy_score); + } while (unblamed); + target->suspects = reverse_blame(leftover, NULL); diff_flush(&diff_opts); - diff_tree_release_paths(&diff_opts); - return retval; + free_pathspec(&diff_opts.pathspec); } /* @@ -1147,20 +1353,21 @@ static int find_copy_in_parent(struct scoreboard *sb, static void pass_whole_blame(struct scoreboard *sb, struct origin *origin, struct origin *porigin) { - struct blame_entry *e; + struct blame_entry *e, *suspects; if (!porigin->file.ptr && origin->file.ptr) { /* Steal its file */ porigin->file = origin->file; origin->file.ptr = NULL; } - for (e = sb->ent; e; e = e->next) { - if (!same_suspect(e->suspect, origin)) - continue; + suspects = origin->suspects; + origin->suspects = NULL; + for (e = suspects; e; e = e->next) { origin_incref(porigin); origin_decref(e->suspect); e->suspect = porigin; } + queue_blames(sb, porigin, suspects); } /* @@ -1170,18 +1377,43 @@ static void pass_whole_blame(struct scoreboard *sb, */ static struct commit_list *first_scapegoat(struct rev_info *revs, struct commit *commit) { - if (!reverse) + if (!reverse) { + if (revs->first_parent_only && + commit->parents && + commit->parents->next) { + free_commit_list(commit->parents->next); + commit->parents->next = NULL; + } return commit->parents; + } return lookup_decoration(&revs->children, &commit->object); } static int num_scapegoats(struct rev_info *revs, struct commit *commit) { - int cnt; struct commit_list *l = first_scapegoat(revs, commit); - for (cnt = 0; l; l = l->next) - cnt++; - return cnt; + return commit_list_count(l); +} + +/* Distribute collected unsorted blames to the respected sorted lists + * in the various origins. + */ +static void distribute_blame(struct scoreboard *sb, struct blame_entry *blamed) +{ + blamed = blame_sort(blamed, compare_blame_suspect); + while (blamed) + { + struct origin *porigin = blamed->suspect; + struct blame_entry *suspects = NULL; + do { + struct blame_entry *next = blamed->next; + blamed->next = suspects; + suspects = blamed; + blamed = next; + } while (blamed && blamed->suspect == porigin); + suspects = reverse_blame(suspects, NULL); + queue_blames(sb, porigin, suspects); + } } #define MAXSG 16 @@ -1194,6 +1426,8 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) struct commit_list *sg; struct origin *sg_buf[MAXSG]; struct origin *porigin, **sg_origin = sg_buf; + struct blame_entry *toosmall = NULL; + struct blame_entry *blames, **blametail = &blames; num_sg = num_scapegoats(revs, commit); if (!num_sg) @@ -1207,7 +1441,7 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) * The first pass looks for unrenamed path to optimize for * common cases, then we look for renames in the second pass. */ - for (pass = 0; pass < 2; pass++) { + for (pass = 0; pass < 2 - no_whole_file_rename; pass++) { struct origin *(*find)(struct scoreboard *, struct commit *, struct origin *); find = pass ? find_rename : find_origin; @@ -1255,38 +1489,71 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) origin_incref(porigin); origin->previous = porigin; } - if (pass_blame_to_parent(sb, origin, porigin)) + pass_blame_to_parent(sb, origin, porigin); + if (!origin->suspects) goto finish; } /* * Optionally find moves in parents' files. */ - if (opt & PICKAXE_BLAME_MOVE) - for (i = 0, sg = first_scapegoat(revs, commit); - i < num_sg && sg; - sg = sg->next, i++) { - struct origin *porigin = sg_origin[i]; - if (!porigin) - continue; - if (find_move_in_parent(sb, origin, porigin)) - goto finish; + if (opt & PICKAXE_BLAME_MOVE) { + filter_small(sb, &toosmall, &origin->suspects, blame_move_score); + if (origin->suspects) { + for (i = 0, sg = first_scapegoat(revs, commit); + i < num_sg && sg; + sg = sg->next, i++) { + struct origin *porigin = sg_origin[i]; + if (!porigin) + continue; + find_move_in_parent(sb, &blametail, &toosmall, origin, porigin); + if (!origin->suspects) + break; + } } + } /* * Optionally find copies from parents' files. */ - if (opt & PICKAXE_BLAME_COPY) + if (opt & PICKAXE_BLAME_COPY) { + if (blame_copy_score > blame_move_score) + filter_small(sb, &toosmall, &origin->suspects, blame_copy_score); + else if (blame_copy_score < blame_move_score) { + origin->suspects = blame_merge(origin->suspects, toosmall); + toosmall = NULL; + filter_small(sb, &toosmall, &origin->suspects, blame_copy_score); + } + if (!origin->suspects) + goto finish; + for (i = 0, sg = first_scapegoat(revs, commit); i < num_sg && sg; sg = sg->next, i++) { struct origin *porigin = sg_origin[i]; - if (find_copy_in_parent(sb, origin, sg->item, - porigin, opt)) + find_copy_in_parent(sb, &blametail, &toosmall, + origin, sg->item, porigin, opt); + if (!origin->suspects) goto finish; } + } - finish: +finish: + *blametail = NULL; + distribute_blame(sb, blames); + /* + * prepend toosmall to origin->suspects + * + * There is no point in sorting: this ends up on a big + * unsorted list in the caller anyway. + */ + if (toosmall) { + struct blame_entry **tail = &toosmall; + while (*tail) + tail = &(*tail)->next; + *tail = origin->suspects; + origin->suspects = toosmall; + } for (i = 0; i < num_sg; i++) { if (sg_origin[i]) { drop_origin_blob(sg_origin[i]); @@ -1301,32 +1568,32 @@ static void pass_blame(struct scoreboard *sb, struct origin *origin, int opt) /* * Information on commits, used for output. */ -struct commit_info -{ - const char *author; - const char *author_mail; +struct commit_info { + struct strbuf author; + struct strbuf author_mail; unsigned long author_time; - const char *author_tz; + struct strbuf author_tz; /* filled only when asked for details */ - const char *committer; - const char *committer_mail; + struct strbuf committer; + struct strbuf committer_mail; unsigned long committer_time; - const char *committer_tz; + struct strbuf committer_tz; - const char *summary; + struct strbuf summary; }; /* * Parse author/committer line in the commit object buffer */ static void get_ac_line(const char *inbuf, const char *what, - int person_len, char *person, - int mail_len, char *mail, - unsigned long *time, const char **tz) + struct strbuf *name, struct strbuf *mail, + unsigned long *time, struct strbuf *tz) { - int len, tzlen, maillen; - char *tmp, *endp, *timepos, *mailpos; + struct ident_split ident; + size_t len, maillen, namelen; + char *tmp, *endp; + const char *namebuf, *mailbuf; tmp = strstr(inbuf, what); if (!tmp) @@ -1337,69 +1604,66 @@ static void get_ac_line(const char *inbuf, const char *what, len = strlen(tmp); else len = endp - tmp; - if (person_len <= len) { + + if (split_ident_line(&ident, tmp, len)) { error_out: /* Ugh */ - *tz = "(unknown)"; - strcpy(person, *tz); - strcpy(mail, *tz); + tmp = "(unknown)"; + strbuf_addstr(name, tmp); + strbuf_addstr(mail, tmp); + strbuf_addstr(tz, tmp); *time = 0; return; } - memcpy(person, tmp, len); - tmp = person; - tmp += len; - *tmp = 0; - while (person < tmp && *tmp != ' ') - tmp--; - if (tmp <= person) - goto error_out; - *tz = tmp+1; - tzlen = (person+len)-(tmp+1); - - *tmp = 0; - while (person < tmp && *tmp != ' ') - tmp--; - if (tmp <= person) - goto error_out; - *time = strtoul(tmp, NULL, 10); - timepos = tmp; + namelen = ident.name_end - ident.name_begin; + namebuf = ident.name_begin; - *tmp = 0; - while (person < tmp && *tmp != ' ') - tmp--; - if (tmp <= person) - return; - mailpos = tmp + 1; - *tmp = 0; - maillen = timepos - tmp; - memcpy(mail, mailpos, maillen); + maillen = ident.mail_end - ident.mail_begin; + mailbuf = ident.mail_begin; - if (!mailmap.nr) - return; + if (ident.date_begin && ident.date_end) + *time = strtoul(ident.date_begin, NULL, 10); + else + *time = 0; - /* - * mailmap expansion may make the name longer. - * make room by pushing stuff down. - */ - tmp = person + person_len - (tzlen + 1); - memmove(tmp, *tz, tzlen); - tmp[tzlen] = 0; - *tz = tmp; + if (ident.tz_begin && ident.tz_end) + strbuf_add(tz, ident.tz_begin, ident.tz_end - ident.tz_begin); + else + strbuf_addstr(tz, "(unknown)"); /* * Now, convert both name and e-mail using mailmap */ - if (map_user(&mailmap, mail+1, mail_len-1, person, tmp-person-1)) { - /* Add a trailing '>' to email, since map_user returns plain emails - Note: It already has '<', since we replace from mail+1 */ - mailpos = memchr(mail, '\0', mail_len); - if (mailpos && mailpos-mail < mail_len - 1) { - *mailpos = '>'; - *(mailpos+1) = '\0'; - } - } + map_user(&mailmap, &mailbuf, &maillen, + &namebuf, &namelen); + + strbuf_addf(mail, "<%.*s>", (int)maillen, mailbuf); + strbuf_add(name, namebuf, namelen); +} + +static void commit_info_init(struct commit_info *ci) +{ + + strbuf_init(&ci->author, 0); + strbuf_init(&ci->author_mail, 0); + strbuf_init(&ci->author_tz, 0); + strbuf_init(&ci->committer, 0); + strbuf_init(&ci->committer_mail, 0); + strbuf_init(&ci->committer_tz, 0); + strbuf_init(&ci->summary, 0); +} + +static void commit_info_destroy(struct commit_info *ci) +{ + + strbuf_release(&ci->author); + strbuf_release(&ci->author_mail); + strbuf_release(&ci->author_tz); + strbuf_release(&ci->committer); + strbuf_release(&ci->committer_mail); + strbuf_release(&ci->committer_tz); + strbuf_release(&ci->summary); } static void get_commit_info(struct commit *commit, @@ -1407,57 +1671,33 @@ static void get_commit_info(struct commit *commit, int detailed) { int len; - const char *subject; - char *reencoded, *message; - static char author_name[1024]; - static char author_mail[1024]; - static char committer_name[1024]; - static char committer_mail[1024]; - static char summary_buf[1024]; + const char *subject, *encoding; + const char *message; - /* - * We've operated without save_commit_buffer, so - * we now need to populate them for output. - */ - if (!commit->buffer) { - enum object_type type; - unsigned long size; - commit->buffer = - read_sha1_file(commit->object.sha1, &type, &size); - if (!commit->buffer) - die("Cannot read commit %s", - sha1_to_hex(commit->object.sha1)); - } - reencoded = reencode_commit_message(commit, NULL); - message = reencoded ? reencoded : commit->buffer; - ret->author = author_name; - ret->author_mail = author_mail; + commit_info_init(ret); + + encoding = get_log_output_encoding(); + message = logmsg_reencode(commit, NULL, encoding); get_ac_line(message, "\nauthor ", - sizeof(author_name), author_name, - sizeof(author_mail), author_mail, + &ret->author, &ret->author_mail, &ret->author_time, &ret->author_tz); if (!detailed) { - free(reencoded); + unuse_commit_buffer(commit, message); return; } - ret->committer = committer_name; - ret->committer_mail = committer_mail; get_ac_line(message, "\ncommitter ", - sizeof(committer_name), committer_name, - sizeof(committer_mail), committer_mail, + &ret->committer, &ret->committer_mail, &ret->committer_time, &ret->committer_tz); - ret->summary = summary_buf; len = find_commit_subject(message, &subject); - if (len && len < sizeof(summary_buf)) { - memcpy(summary_buf, subject, len); - summary_buf[len] = 0; - } else { - sprintf(summary_buf, "(%s)", sha1_to_hex(commit->object.sha1)); - } - free(reencoded); + if (len) + strbuf_add(&ret->summary, subject, len); + else + strbuf_addf(&ret->summary, "(%s)", oid_to_hex(&commit->object.oid)); + + unuse_commit_buffer(commit, message); } /* @@ -1473,86 +1713,96 @@ static void write_filename_info(const char *path) /* * Porcelain/Incremental format wants to show a lot of details per * commit. Instead of repeating this every line, emit it only once, - * the first time each commit appears in the output. + * the first time each commit appears in the output (unless the + * user has specifically asked for us to repeat). */ -static int emit_one_suspect_detail(struct origin *suspect) +static int emit_one_suspect_detail(struct origin *suspect, int repeat) { struct commit_info ci; - if (suspect->commit->object.flags & METAINFO_SHOWN) + if (!repeat && (suspect->commit->object.flags & METAINFO_SHOWN)) return 0; suspect->commit->object.flags |= METAINFO_SHOWN; get_commit_info(suspect->commit, &ci, 1); - printf("author %s\n", ci.author); - printf("author-mail %s\n", ci.author_mail); + printf("author %s\n", ci.author.buf); + printf("author-mail %s\n", ci.author_mail.buf); printf("author-time %lu\n", ci.author_time); - printf("author-tz %s\n", ci.author_tz); - printf("committer %s\n", ci.committer); - printf("committer-mail %s\n", ci.committer_mail); + printf("author-tz %s\n", ci.author_tz.buf); + printf("committer %s\n", ci.committer.buf); + printf("committer-mail %s\n", ci.committer_mail.buf); printf("committer-time %lu\n", ci.committer_time); - printf("committer-tz %s\n", ci.committer_tz); - printf("summary %s\n", ci.summary); + printf("committer-tz %s\n", ci.committer_tz.buf); + printf("summary %s\n", ci.summary.buf); if (suspect->commit->object.flags & UNINTERESTING) printf("boundary\n"); if (suspect->previous) { struct origin *prev = suspect->previous; - printf("previous %s ", sha1_to_hex(prev->commit->object.sha1)); + printf("previous %s ", oid_to_hex(&prev->commit->object.oid)); write_name_quoted(prev->path, stdout, '\n'); } + + commit_info_destroy(&ci); + return 1; } /* - * The blame_entry is found to be guilty for the range. Mark it - * as such, and show it in incremental output. + * The blame_entry is found to be guilty for the range. + * Show it in incremental output. */ -static void found_guilty_entry(struct blame_entry *ent) +static void found_guilty_entry(struct blame_entry *ent, + struct progress_info *pi) { - if (ent->guilty) - return; - ent->guilty = 1; if (incremental) { struct origin *suspect = ent->suspect; printf("%s %d %d %d\n", - sha1_to_hex(suspect->commit->object.sha1), + oid_to_hex(&suspect->commit->object.oid), ent->s_lno + 1, ent->lno + 1, ent->num_lines); - emit_one_suspect_detail(suspect); + emit_one_suspect_detail(suspect, 0); write_filename_info(suspect->path); maybe_flush_or_die(stdout, "stdout"); } + pi->blamed_lines += ent->num_lines; + display_progress(pi->progress, pi->blamed_lines); } /* - * The main loop -- while the scoreboard has lines whose true origin - * is still unknown, pick one blame_entry, and allow its current - * suspect to pass blames to its parents. - */ + * The main loop -- while we have blobs with lines whose true origin + * is still unknown, pick one blob, and allow its lines to pass blames + * to its parents. */ static void assign_blame(struct scoreboard *sb, int opt) { struct rev_info *revs = sb->revs; + struct commit *commit = prio_queue_get(&sb->commits); + struct progress_info pi = { NULL, 0 }; - while (1) { + if (show_progress) + pi.progress = start_progress_delay(_("Blaming lines"), + sb->num_lines, 50, 1); + + while (commit) { struct blame_entry *ent; - struct commit *commit; - struct origin *suspect = NULL; + struct origin *suspect = commit->util; /* find one suspect to break down */ - for (ent = sb->ent; !suspect && ent; ent = ent->next) - if (!ent->guilty) - suspect = ent->suspect; - if (!suspect) - return; /* all done */ + while (suspect && !suspect->suspects) + suspect = suspect->next; + + if (!suspect) { + commit = prio_queue_get(&sb->commits); + continue; + } + + assert(commit == suspect->commit); /* * We will use this suspect later in the loop, * so hold onto it in the meantime. */ origin_incref(suspect); - commit = suspect->commit; - if (!commit->object.parsed) - parse_commit(commit); + parse_commit(commit); if (reverse || (!(commit->object.flags & UNINTERESTING) && !(revs->max_age != -1 && commit->date < revs->max_age))) @@ -1567,35 +1817,57 @@ static void assign_blame(struct scoreboard *sb, int opt) commit->object.flags |= UNINTERESTING; /* Take responsibility for the remaining entries */ - for (ent = sb->ent; ent; ent = ent->next) - if (same_suspect(ent->suspect, suspect)) - found_guilty_entry(ent); + ent = suspect->suspects; + if (ent) { + suspect->guilty = 1; + for (;;) { + struct blame_entry *next = ent->next; + found_guilty_entry(ent, &pi); + if (next) { + ent = next; + continue; + } + ent->next = sb->ent; + sb->ent = suspect->suspects; + suspect->suspects = NULL; + break; + } + } origin_decref(suspect); if (DEBUG) /* sanity */ sanity_check_refcnt(sb); } + + stop_progress(&pi.progress); } static const char *format_time(unsigned long time, const char *tz_str, int show_raw_time) { - static char time_buf[128]; - const char *time_str; - int time_len; - int tz; + static struct strbuf time_buf = STRBUF_INIT; + strbuf_reset(&time_buf); if (show_raw_time) { - sprintf(time_buf, "%lu %s", time, tz_str); + strbuf_addf(&time_buf, "%lu %s", time, tz_str); } else { + const char *time_str; + size_t time_width; + int tz; tz = atoi(tz_str); - time_str = show_date(time, tz, blame_date_mode); - time_len = strlen(time_str); - memcpy(time_buf, time_str, time_len); - memset(time_buf + time_len, ' ', blame_date_width - time_len); + time_str = show_date(time, tz, &blame_date_mode); + strbuf_addstr(&time_buf, time_str); + /* + * Add space paddings to time_buf to display a fixed width + * string, and use time_width for display width calibration. + */ + for (time_width = utf8_strwidth(time_str); + time_width < blame_date_width; + time_width++) + strbuf_addch(&time_buf, ' '); } - return time_buf; + return time_buf.buf; } #define OUTPUT_ANNOTATE_COMPAT 001 @@ -1606,32 +1878,43 @@ static const char *format_time(unsigned long time, const char *tz_str, #define OUTPUT_SHOW_NUMBER 040 #define OUTPUT_SHOW_SCORE 0100 #define OUTPUT_NO_AUTHOR 0200 +#define OUTPUT_SHOW_EMAIL 0400 +#define OUTPUT_LINE_PORCELAIN 01000 -static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent) +static void emit_porcelain_details(struct origin *suspect, int repeat) { + if (emit_one_suspect_detail(suspect, repeat) || + (suspect->commit->object.flags & MORE_THAN_ONE_PATH)) + write_filename_info(suspect->path); +} + +static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent, + int opt) +{ + int repeat = opt & OUTPUT_LINE_PORCELAIN; int cnt; const char *cp; struct origin *suspect = ent->suspect; - char hex[41]; + char hex[GIT_SHA1_HEXSZ + 1]; - strcpy(hex, sha1_to_hex(suspect->commit->object.sha1)); - printf("%s%c%d %d %d\n", + sha1_to_hex_r(hex, suspect->commit->object.oid.hash); + printf("%s %d %d %d\n", hex, - ent->guilty ? ' ' : '*', /* purely for debugging */ ent->s_lno + 1, ent->lno + 1, ent->num_lines); - if (emit_one_suspect_detail(suspect) || - (suspect->commit->object.flags & MORE_THAN_ONE_PATH)) - write_filename_info(suspect->path); + emit_porcelain_details(suspect, repeat); cp = nth_line(sb, ent->lno); for (cnt = 0; cnt < ent->num_lines; cnt++) { char ch; - if (cnt) + if (cnt) { printf("%s %d %d\n", hex, ent->s_lno + 1 + cnt, ent->lno + 1 + cnt); + if (repeat) + emit_porcelain_details(suspect, 1); + } putchar('\t'); do { ch = *cp++; @@ -1650,16 +1933,16 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) const char *cp; struct origin *suspect = ent->suspect; struct commit_info ci; - char hex[41]; + char hex[GIT_SHA1_HEXSZ + 1]; int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP); get_commit_info(suspect->commit, &ci, 1); - strcpy(hex, sha1_to_hex(suspect->commit->object.sha1)); + sha1_to_hex_r(hex, suspect->commit->object.oid.hash); cp = nth_line(sb, ent->lno); for (cnt = 0; cnt < ent->num_lines; cnt++) { char ch; - int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : 8; + int length = (opt & OUTPUT_LONG_OBJECT_NAME) ? 40 : abbrev; if (suspect->commit->object.flags & UNINTERESTING) { if (blank_boundary) @@ -1671,12 +1954,17 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) } printf("%.*s", length, hex); - if (opt & OUTPUT_ANNOTATE_COMPAT) - printf("\t(%10s\t%10s\t%d)", ci.author, - format_time(ci.author_time, ci.author_tz, + if (opt & OUTPUT_ANNOTATE_COMPAT) { + const char *name; + if (opt & OUTPUT_SHOW_EMAIL) + name = ci.author_mail.buf; + else + name = ci.author.buf; + printf("\t(%10s\t%10s\t%d)", name, + format_time(ci.author_time, ci.author_tz.buf, show_raw_time), ent->lno + 1 + cnt); - else { + } else { if (opt & OUTPUT_SHOW_SCORE) printf(" %*d %02d", max_score_digits, ent->score, @@ -1689,11 +1977,17 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) ent->s_lno + 1 + cnt); if (!(opt & OUTPUT_NO_AUTHOR)) { - int pad = longest_author - utf8_strwidth(ci.author); + const char *name; + int pad; + if (opt & OUTPUT_SHOW_EMAIL) + name = ci.author_mail.buf; + else + name = ci.author.buf; + pad = longest_author - utf8_strwidth(name); printf(" (%s%*s %10s", - ci.author, pad, "", + name, pad, "", format_time(ci.author_time, - ci.author_tz, + ci.author_tz.buf, show_raw_time)); } printf(" %*d) ", @@ -1708,6 +2002,8 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) if (sb->final_buf_size && cp[-1] != '\n') putchar('\n'); + + commit_info_destroy(&ci); } static void output(struct scoreboard *sb, int option) @@ -1716,30 +2012,35 @@ static void output(struct scoreboard *sb, int option) if (option & OUTPUT_PORCELAIN) { for (ent = sb->ent; ent; ent = ent->next) { - struct blame_entry *oth; - struct origin *suspect = ent->suspect; - struct commit *commit = suspect->commit; + int count = 0; + struct origin *suspect; + struct commit *commit = ent->suspect->commit; if (commit->object.flags & MORE_THAN_ONE_PATH) continue; - for (oth = ent->next; oth; oth = oth->next) { - if ((oth->suspect->commit != commit) || - !strcmp(oth->suspect->path, suspect->path)) - continue; - commit->object.flags |= MORE_THAN_ONE_PATH; - break; + for (suspect = commit->util; suspect; suspect = suspect->next) { + if (suspect->guilty && count++) { + commit->object.flags |= MORE_THAN_ONE_PATH; + break; + } } } } for (ent = sb->ent; ent; ent = ent->next) { if (option & OUTPUT_PORCELAIN) - emit_porcelain(sb, ent); + emit_porcelain(sb, ent, option); else { emit_other(sb, ent, option); } } } +static const char *get_next_line(const char *start, const char *end) +{ + const char *nl = memchr(start, '\n', end - start); + return nl ? nl + 1 : end; +} + /* * To allow quick access to the contents of nth line in the * final image, prepare an index in the scoreboard. @@ -1748,26 +2049,23 @@ static int prepare_lines(struct scoreboard *sb) { const char *buf = sb->final_buf; unsigned long len = sb->final_buf_size; - int num = 0, incomplete = 0, bol = 1; - - if (len && buf[len-1] != '\n') - incomplete++; /* incomplete line at the end */ - while (len--) { - if (bol) { - sb->lineno = xrealloc(sb->lineno, - sizeof(int *) * (num + 1)); - sb->lineno[num] = buf - sb->final_buf; - bol = 0; - } - if (*buf++ == '\n') { - num++; - bol = 1; - } - } - sb->lineno = xrealloc(sb->lineno, - sizeof(int *) * (num + incomplete + 1)); - sb->lineno[num + incomplete] = buf - sb->final_buf; - sb->num_lines = num + incomplete; + const char *end = buf + len; + const char *p; + int *lineno; + int num = 0; + + for (p = buf; p < end; p = get_next_line(p, end)) + num++; + + ALLOC_ARRAY(sb->lineno, num + 1); + lineno = sb->lineno; + + for (p = buf; p < end; p = get_next_line(p, end)) + *lineno++ = p - buf; + + *lineno = len; + + sb->num_lines = num; return sb->num_lines; } @@ -1779,30 +2077,28 @@ static int prepare_lines(struct scoreboard *sb) static int read_ancestry(const char *graft_file) { FILE *fp = fopen(graft_file, "r"); - char buf[1024]; + struct strbuf buf = STRBUF_INIT; if (!fp) return -1; - while (fgets(buf, sizeof(buf), fp)) { + while (!strbuf_getwholeline(&buf, fp, '\n')) { /* The format is just "Commit Parent1 Parent2 ...\n" */ - int len = strlen(buf); - struct commit_graft *graft = read_graft_line(buf, len); + struct commit_graft *graft = read_graft_line(buf.buf, buf.len); if (graft) register_commit_graft(graft, 0); } fclose(fp); + strbuf_release(&buf); return 0; } -/* - * How many columns do we need to show line numbers in decimal? - */ -static int lineno_width(int lines) +static int update_auto_abbrev(int auto_abbrev, struct origin *suspect) { - int i, width; - - for (width = 1, i = 10; i <= lines; width++) - i *= 10; - return width; + const char *uniq = find_unique_abbrev(suspect->commit->object.oid.hash, + auto_abbrev); + int len = strlen(uniq); + if (auto_abbrev < len) + return len; + return auto_abbrev; } /* @@ -1815,23 +2111,31 @@ static void find_alignment(struct scoreboard *sb, int *option) int longest_dst_lines = 0; unsigned largest_score = 0; struct blame_entry *e; + int compute_auto_abbrev = (abbrev < 0); + int auto_abbrev = default_abbrev; for (e = sb->ent; e; e = e->next) { struct origin *suspect = e->suspect; - struct commit_info ci; int num; + if (compute_auto_abbrev) + auto_abbrev = update_auto_abbrev(auto_abbrev, suspect); if (strcmp(suspect->path, sb->path)) *option |= OUTPUT_SHOW_NAME; num = strlen(suspect->path); if (longest_file < num) longest_file = num; if (!(suspect->commit->object.flags & METAINFO_SHOWN)) { + struct commit_info ci; suspect->commit->object.flags |= METAINFO_SHOWN; get_commit_info(suspect->commit, &ci, 1); - num = utf8_strwidth(ci.author); + if (*option & OUTPUT_SHOW_EMAIL) + num = utf8_strwidth(ci.author_mail.buf); + else + num = utf8_strwidth(ci.author.buf); if (longest_author < num) longest_author = num; + commit_info_destroy(&ci); } num = e->s_lno + e->num_lines; if (longest_src_lines < num) @@ -1842,9 +2146,13 @@ static void find_alignment(struct scoreboard *sb, int *option) if (largest_score < ent_score(sb, e)) largest_score = ent_score(sb, e); } - max_orig_digits = lineno_width(longest_src_lines); - max_digits = lineno_width(longest_dst_lines); - max_score_digits = lineno_width(largest_score); + max_orig_digits = decimal_width(longest_src_lines); + max_digits = decimal_width(longest_dst_lines); + max_score_digits = decimal_width(largest_score); + + if (compute_auto_abbrev) + /* one more abbrev length is needed for the boundary commit */ + abbrev = auto_abbrev + 1; } /* @@ -1861,7 +2169,7 @@ static void sanity_check_refcnt(struct scoreboard *sb) if (ent->suspect->refcnt <= 0) { fprintf(stderr, "%s in %s has negative refcnt %d\n", ent->suspect->path, - sha1_to_hex(ent->suspect->commit->object.sha1), + oid_to_hex(&ent->suspect->commit->object.oid), ent->suspect->refcnt); baa = 1; } @@ -1874,16 +2182,6 @@ static void sanity_check_refcnt(struct scoreboard *sb) } } -/* - * Used for the command line parsing; check if the path exists - * in the working tree. - */ -static int has_string_in_work_tree(const char *path) -{ - struct stat st; - return !lstat(path, &st); -} - static unsigned parse_score(const char *arg) { char *end; @@ -1898,103 +2196,6 @@ static const char *add_prefix(const char *prefix, const char *path) return prefix_path(prefix, prefix ? strlen(prefix) : 0, path); } -/* - * Parsing of (comma separated) one item in the -L option - */ -static const char *parse_loc(const char *spec, - struct scoreboard *sb, long lno, - long begin, long *ret) -{ - char *term; - const char *line; - long num; - int reg_error; - regex_t regexp; - regmatch_t match[1]; - - /* Allow "-L <something>,+20" to mean starting at <something> - * for 20 lines, or "-L <something>,-5" for 5 lines ending at - * <something>. - */ - if (1 < begin && (spec[0] == '+' || spec[0] == '-')) { - num = strtol(spec + 1, &term, 10); - if (term != spec + 1) { - if (spec[0] == '-') - num = 0 - num; - if (0 < num) - *ret = begin + num - 2; - else if (!num) - *ret = begin; - else - *ret = begin + num; - return term; - } - return spec; - } - num = strtol(spec, &term, 10); - if (term != spec) { - *ret = num; - return term; - } - if (spec[0] != '/') - return spec; - - /* it could be a regexp of form /.../ */ - for (term = (char *) spec + 1; *term && *term != '/'; term++) { - if (*term == '\\') - term++; - } - if (*term != '/') - return spec; - - /* try [spec+1 .. term-1] as regexp */ - *term = 0; - begin--; /* input is in human terms */ - line = nth_line(sb, begin); - - if (!(reg_error = regcomp(®exp, spec + 1, REG_NEWLINE)) && - !(reg_error = regexec(®exp, line, 1, match, 0))) { - const char *cp = line + match[0].rm_so; - const char *nline; - - while (begin++ < lno) { - nline = nth_line(sb, begin); - if (line <= cp && cp < nline) - break; - line = nline; - } - *ret = begin; - regfree(®exp); - *term++ = '/'; - return term; - } - else { - char errbuf[1024]; - regerror(reg_error, ®exp, errbuf, 1024); - die("-L parameter '%s': %s", spec + 1, errbuf); - } -} - -/* - * Parsing of -L option - */ -static void prepare_blame_range(struct scoreboard *sb, - const char *bottomtop, - long lno, - long *bottom, long *top) -{ - const char *term; - - term = parse_loc(bottomtop, sb, lno, 1, bottom); - if (*term == ',') { - term = parse_loc(term + 1, sb, lno, *bottom + 1, top); - if (*term) - usage(blame_usage); - } - if (*term) - usage(blame_usage); -} - static int git_blame_config(const char *var, const char *value, void *cb) { if (!strcmp(var, "blame.showroot")) { @@ -2005,25 +2206,87 @@ static int git_blame_config(const char *var, const char *value, void *cb) blank_boundary = git_config_bool(var, value); return 0; } + if (!strcmp(var, "blame.showemail")) { + int *output_option = cb; + if (git_config_bool(var, value)) + *output_option |= OUTPUT_SHOW_EMAIL; + else + *output_option &= ~OUTPUT_SHOW_EMAIL; + return 0; + } if (!strcmp(var, "blame.date")) { if (!value) return config_error_nonbool(var); - blame_date_mode = parse_date_format(value); + parse_date_format(value, &blame_date_mode); return 0; } - switch (userdiff_config(var, value)) { - case 0: - break; - case -1: + if (userdiff_config(var, value) < 0) return -1; - default: - return 0; - } return git_default_config(var, value, cb); } +static void verify_working_tree_path(struct commit *work_tree, const char *path) +{ + struct commit_list *parents; + + for (parents = work_tree->parents; parents; parents = parents->next) { + const unsigned char *commit_sha1 = parents->item->object.oid.hash; + unsigned char blob_sha1[20]; + unsigned mode; + + if (!get_tree_entry(commit_sha1, path, blob_sha1, &mode) && + sha1_object_info(blob_sha1, NULL) == OBJ_BLOB) + return; + } + die("no such path '%s' in HEAD", path); +} + +static struct commit_list **append_parent(struct commit_list **tail, const unsigned char *sha1) +{ + struct commit *parent; + + parent = lookup_commit_reference(sha1); + if (!parent) + die("no such commit %s", sha1_to_hex(sha1)); + return &commit_list_insert(parent, tail)->next; +} + +static void append_merge_parents(struct commit_list **tail) +{ + int merge_head; + struct strbuf line = STRBUF_INIT; + + merge_head = open(git_path_merge_head(), O_RDONLY); + if (merge_head < 0) { + if (errno == ENOENT) + return; + die("cannot open '%s' for reading", git_path_merge_head()); + } + + while (!strbuf_getwholeline_fd(&line, merge_head, '\n')) { + unsigned char sha1[20]; + if (line.len < 40 || get_sha1_hex(line.buf, sha1)) + die("unknown line in '%s': %s", git_path_merge_head(), line.buf); + tail = append_parent(tail, sha1); + } + close(merge_head); + strbuf_release(&line); +} + +/* + * This isn't as simple as passing sb->buf and sb->len, because we + * want to transfer ownership of the buffer to the commit (so we + * must use detach). + */ +static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb) +{ + size_t len; + void *buf = strbuf_detach(sb, &len); + set_commit_buffer(c, buf, len); +} + /* * Prepare a dummy commit that represents the work tree (or staged) item. * Note that annotating work tree item never works in the reverse. @@ -2034,6 +2297,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, { struct commit *commit; struct origin *origin; + struct commit_list **parent_tail, *parent; unsigned char head_sha1[20]; struct strbuf buf = STRBUF_INIT; const char *ident; @@ -2041,23 +2305,42 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, int size, len; struct cache_entry *ce; unsigned mode; + struct strbuf msg = STRBUF_INIT; - if (get_sha1("HEAD", head_sha1)) - die("No such ref: HEAD"); - + read_cache(); time(&now); - commit = xcalloc(1, sizeof(*commit)); - commit->parents = xcalloc(1, sizeof(*commit->parents)); - commit->parents->item = lookup_commit_reference(head_sha1); + commit = alloc_commit_node(); commit->object.parsed = 1; commit->date = now; - commit->object.type = OBJ_COMMIT; + parent_tail = &commit->parents; + + if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING, head_sha1, NULL)) + die("no such ref: HEAD"); + + parent_tail = append_parent(parent_tail, head_sha1); + append_merge_parents(parent_tail); + verify_working_tree_path(commit, path); origin = make_origin(commit, path); + ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0); + strbuf_addstr(&msg, "tree 0000000000000000000000000000000000000000\n"); + for (parent = commit->parents; parent; parent = parent->next) + strbuf_addf(&msg, "parent %s\n", + oid_to_hex(&parent->item->object.oid)); + strbuf_addf(&msg, + "author %s\n" + "committer %s\n\n" + "Version of %s from %s\n", + ident, ident, path, + (!contents_from ? path : + (!strcmp(contents_from, "-") ? "standard input" : contents_from))); + set_commit_buffer_from_strbuf(commit, &msg); + if (!contents_from || strcmp("-", contents_from)) { struct stat st; const char *read_from; + char *buf_ptr; unsigned long buf_len; if (contents_from) { @@ -2075,8 +2358,8 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, switch (st.st_mode & S_IFMT) { case S_IFREG: if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) && - textconv_object(read_from, null_sha1, &buf.buf, &buf_len)) - buf.len = buf_len; + textconv_object(read_from, mode, null_sha1, 0, &buf_ptr, &buf_len)) + strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1); else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size) die_errno("cannot open or read '%s'", read_from); break; @@ -2090,7 +2373,6 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, } else { /* Reading from stdin */ - contents_from = "standard input"; mode = 0; if (strbuf_read(&buf, 0, 0) < 0) die_errno("failed to read from stdin"); @@ -2099,7 +2381,6 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, origin->file.ptr = buf.buf; origin->file.size = buf.len; pretend_sha1_file(buf.buf, buf.len, OBJ_BLOB, origin->blob_sha1); - commit->util = origin; /* * Read the current index, replace the path entry with @@ -2123,40 +2404,23 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ce = xcalloc(1, size); hashcpy(ce->sha1, origin->blob_sha1); memcpy(ce->name, path, len); - ce->ce_flags = create_ce_flags(len, 0); + ce->ce_flags = create_ce_flags(0); + ce->ce_namelen = len; ce->ce_mode = create_ce_mode(mode); add_cache_entry(ce, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE); - /* - * We are not going to write this out, so this does not matter - * right now, but someday we might optimize diff-index --cached - * with cache-tree information. - */ - cache_tree_invalidate_path(active_cache_tree, path); + cache_tree_invalidate_path(&the_index, path); - commit->buffer = xmalloc(400); - ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0); - snprintf(commit->buffer, 400, - "tree 0000000000000000000000000000000000000000\n" - "parent %s\n" - "author %s\n" - "committer %s\n\n" - "Version of %s from %s\n", - sha1_to_hex(head_sha1), - ident, ident, path, contents_from ? contents_from : path); return commit; } -static const char *prepare_final(struct scoreboard *sb) +static struct commit *find_single_final(struct rev_info *revs, + const char **name_p) { int i; - const char *final_commit_name = NULL; - struct rev_info *revs = sb->revs; + struct commit *found = NULL; + const char *name = NULL; - /* - * There must be one and only one positive commit in the - * revs->pending array. - */ for (i = 0; i < revs->pending.nr; i++) { struct object *obj = revs->pending.objects[i].item; if (obj->flags & UNINTERESTING) @@ -2165,17 +2429,25 @@ static const char *prepare_final(struct scoreboard *sb) obj = deref_tag(obj, NULL, 0); if (obj->type != OBJ_COMMIT) die("Non commit %s?", revs->pending.objects[i].name); - if (sb->final) + if (found) die("More than one commit to dig from %s and %s?", - revs->pending.objects[i].name, - final_commit_name); - sb->final = (struct commit *) obj; - final_commit_name = revs->pending.objects[i].name; + revs->pending.objects[i].name, name); + found = (struct commit *)obj; + name = revs->pending.objects[i].name; } - return final_commit_name; + if (name_p) + *name_p = name; + return found; } -static const char *prepare_initial(struct scoreboard *sb) +static char *prepare_final(struct scoreboard *sb) +{ + const char *name; + sb->final = find_single_final(sb->revs, &name); + return xstrdup_or_null(name); +} + +static char *prepare_initial(struct scoreboard *sb) { int i; const char *final_commit_name = NULL; @@ -2202,7 +2474,7 @@ static const char *prepare_initial(struct scoreboard *sb) } if (!final_commit_name) die("No commit to dig down to?"); - return final_commit_name; + return xstrdup(final_commit_name); } static int blame_copy_callback(const struct option *option, const char *arg, int unset) @@ -2238,68 +2510,68 @@ static int blame_move_callback(const struct option *option, const char *arg, int return 0; } -static int blame_bottomtop_callback(const struct option *option, const char *arg, int unset) -{ - const char **bottomtop = option->value; - if (!arg) - return -1; - if (*bottomtop) - die("More than one '-L n,m' option given"); - *bottomtop = arg; - return 0; -} - int cmd_blame(int argc, const char **argv, const char *prefix) { struct rev_info revs; const char *path; struct scoreboard sb; struct origin *o; - struct blame_entry *ent; - long dashdash_pos, bottom, top, lno; - const char *final_commit_name = NULL; + struct blame_entry *ent = NULL; + long dashdash_pos, lno; + char *final_commit_name = NULL; enum object_type type; + struct commit *final_commit = NULL; - static const char *bottomtop = NULL; + static struct string_list range_list; static int output_option = 0, opt = 0; static int show_stats = 0; static const char *revs_file = NULL; static const char *contents_from = NULL; static const struct option options[] = { - OPT_BOOLEAN(0, "incremental", &incremental, "Show blame entries as we find them, incrementally"), - OPT_BOOLEAN('b', NULL, &blank_boundary, "Show blank SHA-1 for boundary commits (Default: off)"), - OPT_BOOLEAN(0, "root", &show_root, "Do not treat root commits as boundaries (Default: off)"), - OPT_BOOLEAN(0, "show-stats", &show_stats, "Show work cost statistics"), - OPT_BIT(0, "score-debug", &output_option, "Show output score for blame entries", OUTPUT_SHOW_SCORE), - OPT_BIT('f', "show-name", &output_option, "Show original filename (Default: auto)", OUTPUT_SHOW_NAME), - OPT_BIT('n', "show-number", &output_option, "Show original linenumber (Default: off)", OUTPUT_SHOW_NUMBER), - OPT_BIT('p', "porcelain", &output_option, "Show in a format designed for machine consumption", OUTPUT_PORCELAIN), - OPT_BIT('c', NULL, &output_option, "Use the same output mode as git-annotate (Default: off)", OUTPUT_ANNOTATE_COMPAT), - OPT_BIT('t', NULL, &output_option, "Show raw timestamp (Default: off)", OUTPUT_RAW_TIMESTAMP), - OPT_BIT('l', NULL, &output_option, "Show long commit SHA1 (Default: off)", OUTPUT_LONG_OBJECT_NAME), - OPT_BIT('s', NULL, &output_option, "Suppress author name and timestamp (Default: off)", OUTPUT_NO_AUTHOR), - OPT_BIT('w', NULL, &xdl_opts, "Ignore whitespace differences", XDF_IGNORE_WHITESPACE), - OPT_STRING('S', NULL, &revs_file, "file", "Use revisions from <file> instead of calling git-rev-list"), - OPT_STRING(0, "contents", &contents_from, "file", "Use <file>'s contents as the final image"), - { OPTION_CALLBACK, 'C', NULL, &opt, "score", "Find line copies within and across files", PARSE_OPT_OPTARG, blame_copy_callback }, - { OPTION_CALLBACK, 'M', NULL, &opt, "score", "Find line movements within and across files", PARSE_OPT_OPTARG, blame_move_callback }, - OPT_CALLBACK('L', NULL, &bottomtop, "n,m", "Process only line range n,m, counting from 1", blame_bottomtop_callback), + OPT_BOOL(0, "incremental", &incremental, N_("Show blame entries as we find them, incrementally")), + OPT_BOOL('b', NULL, &blank_boundary, N_("Show blank SHA-1 for boundary commits (Default: off)")), + OPT_BOOL(0, "root", &show_root, N_("Do not treat root commits as boundaries (Default: off)")), + OPT_BOOL(0, "show-stats", &show_stats, N_("Show work cost statistics")), + OPT_BOOL(0, "progress", &show_progress, N_("Force progress reporting")), + OPT_BIT(0, "score-debug", &output_option, N_("Show output score for blame entries"), OUTPUT_SHOW_SCORE), + OPT_BIT('f', "show-name", &output_option, N_("Show original filename (Default: auto)"), OUTPUT_SHOW_NAME), + OPT_BIT('n', "show-number", &output_option, N_("Show original linenumber (Default: off)"), OUTPUT_SHOW_NUMBER), + OPT_BIT('p', "porcelain", &output_option, N_("Show in a format designed for machine consumption"), OUTPUT_PORCELAIN), + OPT_BIT(0, "line-porcelain", &output_option, N_("Show porcelain format with per-line commit information"), OUTPUT_PORCELAIN|OUTPUT_LINE_PORCELAIN), + OPT_BIT('c', NULL, &output_option, N_("Use the same output mode as git-annotate (Default: off)"), OUTPUT_ANNOTATE_COMPAT), + OPT_BIT('t', NULL, &output_option, N_("Show raw timestamp (Default: off)"), OUTPUT_RAW_TIMESTAMP), + OPT_BIT('l', NULL, &output_option, N_("Show long commit SHA1 (Default: off)"), OUTPUT_LONG_OBJECT_NAME), + OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR), + OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL), + OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE), + OPT_BIT(0, "minimal", &xdl_opts, N_("Spend extra cycles to find better match"), XDF_NEED_MINIMAL), + OPT_STRING('S', NULL, &revs_file, N_("file"), N_("Use revisions from <file> instead of calling git-rev-list")), + OPT_STRING(0, "contents", &contents_from, N_("file"), N_("Use <file>'s contents as the final image")), + { OPTION_CALLBACK, 'C', NULL, &opt, N_("score"), N_("Find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback }, + { OPTION_CALLBACK, 'M', NULL, &opt, N_("score"), N_("Find line movements within and across files"), PARSE_OPT_OPTARG, blame_move_callback }, + OPT_STRING_LIST('L', NULL, &range_list, N_("n,m"), N_("Process only line range n,m, counting from 1")), + OPT__ABBREV(&abbrev), OPT_END() }; struct parse_opt_ctx_t ctx; int cmd_is_annotate = !strcmp(argv[0], "annotate"); + struct range_set ranges; + unsigned int range_i; + long anchor; - git_config(git_blame_config, NULL); + git_config(git_blame_config, &output_option); init_revisions(&revs, NULL); revs.date_mode = blame_date_mode; DIFF_OPT_SET(&revs.diffopt, ALLOW_TEXTCONV); + DIFF_OPT_SET(&revs.diffopt, FOLLOW_RENAMES); save_commit_buffer = 0; dashdash_pos = 0; + show_progress = -1; - parse_options_start(&ctx, argc, argv, prefix, PARSE_OPT_KEEP_DASHDASH | - PARSE_OPT_KEEP_ARGV0); + parse_options_start(&ctx, argc, argv, prefix, options, + PARSE_OPT_KEEP_DASHDASH | PARSE_OPT_KEEP_ARGV0); for (;;) { switch (parse_options_step(&ctx, options, blame_opt_usage)) { case PARSE_OPT_HELP: @@ -2317,23 +2589,39 @@ int cmd_blame(int argc, const char **argv, const char *prefix) parse_revision_opt(&revs, &ctx, options, blame_opt_usage); } parse_done: + no_whole_file_rename = !DIFF_OPT_TST(&revs.diffopt, FOLLOW_RENAMES); + DIFF_OPT_CLR(&revs.diffopt, FOLLOW_RENAMES); argc = parse_options_end(&ctx); + if (incremental || (output_option & OUTPUT_PORCELAIN)) { + if (show_progress > 0) + die("--progress can't be used with --incremental or porcelain formats"); + show_progress = 0; + } else if (show_progress < 0) + show_progress = isatty(2); + + if (0 < abbrev) + /* one more abbrev length is needed for the boundary commit */ + abbrev++; + if (revs_file && read_ancestry(revs_file)) die_errno("reading graft file '%s' failed", revs_file); if (cmd_is_annotate) { output_option |= OUTPUT_ANNOTATE_COMPAT; - blame_date_mode = DATE_ISO8601; + blame_date_mode.type = DATE_ISO8601; } else { blame_date_mode = revs.date_mode; } /* The maximum width used to show the dates */ - switch (blame_date_mode) { + switch (blame_date_mode.type) { case DATE_RFC2822: blame_date_width = sizeof("Thu, 19 Oct 2006 16:00:04 -0700"); break; + case DATE_ISO8601_STRICT: + blame_date_width = sizeof("2006-10-19T16:00:04-07:00"); + break; case DATE_ISO8601: blame_date_width = sizeof("2006-10-19 16:00:04 -0700"); break; @@ -2344,11 +2632,20 @@ parse_done: blame_date_width = sizeof("2006-10-19"); break; case DATE_RELATIVE: - /* "normal" is used as the fallback for "relative" */ - case DATE_LOCAL: + /* TRANSLATORS: This string is used to tell us the maximum + display width for a relative timestamp in "git blame" + output. For C locale, "4 years, 11 months ago", which + takes 22 places, is the longest among various forms of + relative timestamps, but your language may need more or + fewer display columns. */ + blame_date_width = utf8_strwidth(_("4 years, 11 months ago")) + 1; /* add the null */ + break; case DATE_NORMAL: blame_date_width = sizeof("Thu Oct 19 16:00:04 2006 -0700"); break; + case DATE_STRFTIME: + blame_date_width = strlen(show_date(0, 0, &blame_date_mode)) + 1; /* add the null */ + break; } blame_date_width -= 1; /* strip the null */ @@ -2400,14 +2697,14 @@ parse_done: if (argc < 2) usage_with_options(blame_opt_usage, options); path = add_prefix(prefix, argv[argc - 1]); - if (argc == 3 && !has_string_in_work_tree(path)) { /* (2b) */ + if (argc == 3 && !file_exists(path)) { /* (2b) */ path = add_prefix(prefix, argv[1]); argv[1] = argv[2]; } argv[argc - 1] = "--"; setup_work_tree(); - if (!has_string_in_work_tree(path)) + if (!file_exists(path)) die_errno("cannot stat path '%s'", path); } @@ -2416,12 +2713,18 @@ parse_done: memset(&sb, 0, sizeof(sb)); sb.revs = &revs; - if (!reverse) + if (!reverse) { final_commit_name = prepare_final(&sb); + sb.commits.compare = compare_commits_by_commit_date; + } else if (contents_from) - die("--contents and --children do not blend well."); - else + die("--contents and --reverse do not blend well."); + else { final_commit_name = prepare_initial(&sb); + sb.commits.compare = compare_commits_by_reverse_commit_date; + if (revs.first_parent_only) + revs.children.name = NULL; + } if (!sb.final) { /* @@ -2437,29 +2740,51 @@ parse_done: else if (contents_from) die("Cannot use --contents with final commit object name"); + if (reverse && revs.first_parent_only) { + final_commit = find_single_final(sb.revs, NULL); + if (!final_commit) + die("--reverse and --first-parent together require specified latest commit"); + } + /* * If we have bottom, this will mark the ancestors of the * bottom commits we would reach while traversing as * uninteresting. */ if (prepare_revision_walk(&revs)) - die("revision walk setup failed"); + die(_("revision walk setup failed")); - if (is_null_sha1(sb.final->object.sha1)) { - char *buf; + if (reverse && revs.first_parent_only) { + struct commit *c = final_commit; + + sb.revs->children.name = "children"; + while (c->parents && + oidcmp(&c->object.oid, &sb.final->object.oid)) { + struct commit_list *l = xcalloc(1, sizeof(*l)); + + l->item = c; + if (add_decoration(&sb.revs->children, + &c->parents->item->object, l)) + die("BUG: not unique item in first-parent chain"); + c = c->parents->item; + } + + if (oidcmp(&c->object.oid, &sb.final->object.oid)) + die("--reverse --first-parent together require range along first-parent chain"); + } + + if (is_null_oid(&sb.final->object.oid)) { o = sb.final->util; - buf = xmalloc(o->file.size + 1); - memcpy(buf, o->file.ptr, o->file.size + 1); - sb.final_buf = buf; + sb.final_buf = xmemdupz(o->file.ptr, o->file.size); sb.final_buf_size = o->file.size; } else { o = get_origin(&sb, sb.final, path); - if (fill_blob_sha1(o)) + if (fill_blob_sha1_and_mode(o)) die("no such path %s in %s", path, final_commit_name); if (DIFF_OPT_TST(&sb.revs->diffopt, ALLOW_TEXTCONV) && - textconv_object(path, o->blob_sha1, (char **) &sb.final_buf, + textconv_object(path, o->mode, o->blob_sha1, 1, (char **) &sb.final_buf, &sb.final_buf_size)) ; else @@ -2474,40 +2799,68 @@ parse_done: num_read_blob++; lno = prepare_lines(&sb); - bottom = top = 0; - if (bottomtop) - prepare_blame_range(&sb, bottomtop, lno, &bottom, &top); - if (bottom && top && top < bottom) { - long tmp; - tmp = top; top = bottom; bottom = tmp; - } - if (bottom < 1) - bottom = 1; - if (top < 1) - top = lno; - bottom--; - if (lno < top || lno < bottom) - die("file %s has only %lu lines", path, lno); - - ent = xcalloc(1, sizeof(*ent)); - ent->lno = bottom; - ent->num_lines = top - bottom; - ent->suspect = o; - ent->s_lno = bottom; - - sb.ent = ent; + if (lno && !range_list.nr) + string_list_append(&range_list, xstrdup("1")); + + anchor = 1; + range_set_init(&ranges, range_list.nr); + for (range_i = 0; range_i < range_list.nr; ++range_i) { + long bottom, top; + if (parse_range_arg(range_list.items[range_i].string, + nth_line_cb, &sb, lno, anchor, + &bottom, &top, sb.path)) + usage(blame_usage); + if (lno < top || ((lno || bottom) && lno < bottom)) + die("file %s has only %lu lines", path, lno); + if (bottom < 1) + bottom = 1; + if (top < 1) + top = lno; + bottom--; + range_set_append_unsafe(&ranges, bottom, top); + anchor = top + 1; + } + sort_and_merge_range_set(&ranges); + + for (range_i = ranges.nr; range_i > 0; --range_i) { + const struct range *r = &ranges.ranges[range_i - 1]; + long bottom = r->start; + long top = r->end; + struct blame_entry *next = ent; + ent = xcalloc(1, sizeof(*ent)); + ent->lno = bottom; + ent->num_lines = top - bottom; + ent->suspect = o; + ent->s_lno = bottom; + ent->next = next; + origin_incref(o); + } + + o->suspects = ent; + prio_queue_put(&sb.commits, o->commit); + + origin_decref(o); + + range_set_release(&ranges); + string_list_clear(&range_list, 0); + + sb.ent = NULL; sb.path = path; read_mailmap(&mailmap, NULL); + assign_blame(&sb, opt); + if (!incremental) setup_pager(); - assign_blame(&sb, opt); + free(final_commit_name); if (incremental) return 0; + sb.ent = blame_sort(sb.ent, compare_blame_final); + coalesce(&sb); if (!(output_option & OUTPUT_PORCELAIN)) |