diff options
author | Junio C Hamano <gitster@pobox.com> | 2020-10-04 12:49:14 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2020-10-04 12:49:14 -0700 |
commit | 2fa8aacc723d4c76b84d449fa7cf0de6cf40e5ad (patch) | |
tree | e1a865fa66fadcd8cc2380ddb206b86f572aabf3 | |
parent | Merge branch 'jc/fmt-merge-msg-suppress-destination' (diff) | |
parent | shortlog: allow multiple groups to be specified (diff) | |
download | tgif-2fa8aacc723d4c76b84d449fa7cf0de6cf40e5ad.tar.xz |
Merge branch 'jk/shortlog-group-by-trailer'
"git shortlog" has been taught to group commits by the contents of
the trailer lines, like "Reviewed-by:", "Coauthored-by:", etc.
* jk/shortlog-group-by-trailer:
shortlog: allow multiple groups to be specified
shortlog: parse trailer idents
shortlog: rename parse_stdin_ident()
shortlog: de-duplicate trailer values
shortlog: match commit trailers with --group
trailer: add interface for iterating over commit trailers
shortlog: add grouping option
shortlog: change "author" variables to "ident"
-rw-r--r-- | Documentation/git-shortlog.txt | 31 | ||||
-rw-r--r-- | builtin/log.c | 1 | ||||
-rw-r--r-- | builtin/shortlog.c | 213 | ||||
-rw-r--r-- | shortlog.h | 8 | ||||
-rwxr-xr-x | t/t4201-shortlog.sh | 141 | ||||
-rw-r--r-- | trailer.c | 36 | ||||
-rw-r--r-- | trailer.h | 45 |
7 files changed, 444 insertions, 31 deletions
diff --git a/Documentation/git-shortlog.txt b/Documentation/git-shortlog.txt index a72ea7f7ba..fd93cd41e9 100644 --- a/Documentation/git-shortlog.txt +++ b/Documentation/git-shortlog.txt @@ -47,9 +47,38 @@ OPTIONS Each pretty-printed commit will be rewrapped before it is shown. +--group=<type>:: + Group commits based on `<type>`. If no `--group` option is + specified, the default is `author`. `<type>` is one of: ++ +-- + - `author`, commits are grouped by author + - `committer`, commits are grouped by committer (the same as `-c`) + - `trailer:<field>`, the `<field>` is interpreted as a case-insensitive + commit message trailer (see linkgit:git-interpret-trailers[1]). For + example, if your project uses `Reviewed-by` trailers, you might want + to see who has been reviewing with + `git shortlog -ns --group=trailer:reviewed-by`. ++ +Note that commits that do not include the trailer will not be counted. +Likewise, commits with multiple trailers (e.g., multiple signoffs) may +be counted more than once (but only once per unique trailer value in +that commit). ++ +Shortlog will attempt to parse each trailer value as a `name <email>` +identity. If successful, the mailmap is applied and the email is omitted +unless the `--email` option is specified. If the value cannot be parsed +as an identity, it will be taken literally and completely. +-- ++ +If `--group` is specified multiple times, commits are counted under each +value (but again, only once per unique value in that commit). For +example, `git shortlog --group=author --group=trailer:co-authored-by` +counts both authors and co-authors. + -c:: --committer:: - Collect and show committer identities instead of authors. + This is an alias for `--group=committer`. -w[<width>[,<indent1>[,<indent2>]]]:: Linewrap the output by wrapping each line at `width`. The first diff --git a/builtin/log.c b/builtin/log.c index b8824d898f..7f27e9eca1 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1195,6 +1195,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, log.in1 = 2; log.in2 = 4; log.file = rev->diffopt.file; + log.groups = SHORTLOG_GROUP_AUTHOR; for (i = 0; i < nr; i++) shortlog_add_commit(&log, list[i]); diff --git a/builtin/shortlog.c b/builtin/shortlog.c index c856c58bb5..0a5c4968f6 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -9,6 +9,7 @@ #include "mailmap.h" #include "shortlog.h" #include "parse-options.h" +#include "trailer.h" static char const * const shortlog_usage[] = { N_("git shortlog [<options>] [<revision-range>] [[--] <path>...]"), @@ -49,12 +50,12 @@ static int compare_by_list(const void *a1, const void *a2) } static void insert_one_record(struct shortlog *log, - const char *author, + const char *ident, const char *oneline) { struct string_list_item *item; - item = string_list_insert(&log->list, author); + item = string_list_insert(&log->list, ident); if (log->summary) item->util = (void *)(UTIL_TO_INT(item) + 1); @@ -97,8 +98,8 @@ static void insert_one_record(struct shortlog *log, } } -static int parse_stdin_author(struct shortlog *log, - struct strbuf *out, const char *in) +static int parse_ident(struct shortlog *log, + struct strbuf *out, const char *in) { const char *mailbuf, *namebuf; size_t namelen, maillen; @@ -122,18 +123,33 @@ static int parse_stdin_author(struct shortlog *log, static void read_from_stdin(struct shortlog *log) { - struct strbuf author = STRBUF_INIT; - struct strbuf mapped_author = STRBUF_INIT; + struct strbuf ident = STRBUF_INIT; + struct strbuf mapped_ident = STRBUF_INIT; struct strbuf oneline = STRBUF_INIT; static const char *author_match[2] = { "Author: ", "author " }; static const char *committer_match[2] = { "Commit: ", "committer " }; const char **match; - match = log->committer ? committer_match : author_match; - while (strbuf_getline_lf(&author, stdin) != EOF) { + if (HAS_MULTI_BITS(log->groups)) + die(_("using multiple --group options with stdin is not supported")); + + switch (log->groups) { + case SHORTLOG_GROUP_AUTHOR: + match = author_match; + break; + case SHORTLOG_GROUP_COMMITTER: + match = committer_match; + break; + case SHORTLOG_GROUP_TRAILER: + die(_("using --group=trailer with stdin is not supported")); + default: + BUG("unhandled shortlog group"); + } + + while (strbuf_getline_lf(&ident, stdin) != EOF) { const char *v; - if (!skip_prefix(author.buf, match[0], &v) && - !skip_prefix(author.buf, match[1], &v)) + if (!skip_prefix(ident.buf, match[0], &v) && + !skip_prefix(ident.buf, match[1], &v)) continue; while (strbuf_getline_lf(&oneline, stdin) != EOF && oneline.len) @@ -142,23 +158,118 @@ static void read_from_stdin(struct shortlog *log) !oneline.len) ; /* discard blanks */ - strbuf_reset(&mapped_author); - if (parse_stdin_author(log, &mapped_author, v) < 0) + strbuf_reset(&mapped_ident); + if (parse_ident(log, &mapped_ident, v) < 0) continue; - insert_one_record(log, mapped_author.buf, oneline.buf); + insert_one_record(log, mapped_ident.buf, oneline.buf); } - strbuf_release(&author); - strbuf_release(&mapped_author); + strbuf_release(&ident); + strbuf_release(&mapped_ident); strbuf_release(&oneline); } +struct strset_item { + struct hashmap_entry ent; + char value[FLEX_ARRAY]; +}; + +struct strset { + struct hashmap map; +}; + +#define STRSET_INIT { { NULL } } + +static int strset_item_hashcmp(const void *hash_data, + const struct hashmap_entry *entry, + const struct hashmap_entry *entry_or_key, + const void *keydata) +{ + const struct strset_item *a, *b; + + a = container_of(entry, const struct strset_item, ent); + if (keydata) + return strcmp(a->value, keydata); + + b = container_of(entry_or_key, const struct strset_item, ent); + return strcmp(a->value, b->value); +} + +/* + * Adds "str" to the set if it was not already present; returns true if it was + * already there. + */ +static int strset_check_and_add(struct strset *ss, const char *str) +{ + unsigned int hash = strhash(str); + struct strset_item *item; + + if (!ss->map.table) + hashmap_init(&ss->map, strset_item_hashcmp, NULL, 0); + + if (hashmap_get_from_hash(&ss->map, hash, str)) + return 1; + + FLEX_ALLOC_STR(item, value, str); + hashmap_entry_init(&item->ent, hash); + hashmap_add(&ss->map, &item->ent); + return 0; +} + +static void strset_clear(struct strset *ss) +{ + if (!ss->map.table) + return; + hashmap_free_entries(&ss->map, struct strset_item, ent); +} + +static void insert_records_from_trailers(struct shortlog *log, + struct strset *dups, + struct commit *commit, + struct pretty_print_context *ctx, + const char *oneline) +{ + struct trailer_iterator iter; + const char *commit_buffer, *body; + struct strbuf ident = STRBUF_INIT; + + /* + * Using format_commit_message("%B") would be simpler here, but + * this saves us copying the message. + */ + commit_buffer = logmsg_reencode(commit, NULL, ctx->output_encoding); + body = strstr(commit_buffer, "\n\n"); + if (!body) + return; + + trailer_iterator_init(&iter, body); + while (trailer_iterator_advance(&iter)) { + const char *value = iter.val.buf; + + if (!string_list_has_string(&log->trailers, iter.key.buf)) + continue; + + strbuf_reset(&ident); + if (!parse_ident(log, &ident, value)) + value = ident.buf; + + if (strset_check_and_add(dups, value)) + continue; + insert_one_record(log, value, oneline); + } + trailer_iterator_release(&iter); + + strbuf_release(&ident); + unuse_commit_buffer(commit, commit_buffer); +} + void shortlog_add_commit(struct shortlog *log, struct commit *commit) { - struct strbuf author = STRBUF_INIT; + struct strbuf ident = STRBUF_INIT; struct strbuf oneline = STRBUF_INIT; + struct strset dups = STRSET_INIT; struct pretty_print_context ctx = {0}; - const char *fmt; + const char *oneline_str; ctx.fmt = CMIT_FMT_USERFORMAT; ctx.abbrev = log->abbrev; @@ -166,21 +277,38 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit) ctx.date_mode.type = DATE_NORMAL; ctx.output_encoding = get_log_output_encoding(); - fmt = log->committer ? - (log->email ? "%cN <%cE>" : "%cN") : - (log->email ? "%aN <%aE>" : "%aN"); - - format_commit_message(commit, fmt, &author, &ctx); if (!log->summary) { if (log->user_format) pretty_print_commit(&ctx, commit, &oneline); else format_commit_message(commit, "%s", &oneline, &ctx); } + oneline_str = oneline.len ? oneline.buf : "<none>"; + + if (log->groups & SHORTLOG_GROUP_AUTHOR) { + strbuf_reset(&ident); + format_commit_message(commit, + log->email ? "%aN <%aE>" : "%aN", + &ident, &ctx); + if (!HAS_MULTI_BITS(log->groups) || + !strset_check_and_add(&dups, ident.buf)) + insert_one_record(log, ident.buf, oneline_str); + } + if (log->groups & SHORTLOG_GROUP_COMMITTER) { + strbuf_reset(&ident); + format_commit_message(commit, + log->email ? "%cN <%cE>" : "%cN", + &ident, &ctx); + if (!HAS_MULTI_BITS(log->groups) || + !strset_check_and_add(&dups, ident.buf)) + insert_one_record(log, ident.buf, oneline_str); + } + if (log->groups & SHORTLOG_GROUP_TRAILER) { + insert_records_from_trailers(log, &dups, commit, &ctx, oneline_str); + } - insert_one_record(log, author.buf, oneline.len ? oneline.buf : "<none>"); - - strbuf_release(&author); + strset_clear(&dups); + strbuf_release(&ident); strbuf_release(&oneline); } @@ -241,6 +369,28 @@ static int parse_wrap_args(const struct option *opt, const char *arg, int unset) return 0; } +static int parse_group_option(const struct option *opt, const char *arg, int unset) +{ + struct shortlog *log = opt->value; + const char *field; + + if (unset) { + log->groups = 0; + string_list_clear(&log->trailers, 0); + } else if (!strcasecmp(arg, "author")) + log->groups |= SHORTLOG_GROUP_AUTHOR; + else if (!strcasecmp(arg, "committer")) + log->groups |= SHORTLOG_GROUP_COMMITTER; + else if (skip_prefix(arg, "trailer:", &field)) { + log->groups |= SHORTLOG_GROUP_TRAILER; + string_list_append(&log->trailers, field); + } else + return error(_("unknown group type: %s"), arg); + + return 0; +} + + void shortlog_init(struct shortlog *log) { memset(log, 0, sizeof(*log)); @@ -251,6 +401,8 @@ void shortlog_init(struct shortlog *log) log->wrap = DEFAULT_WRAPLEN; log->in1 = DEFAULT_INDENT1; log->in2 = DEFAULT_INDENT2; + log->trailers.strdup_strings = 1; + log->trailers.cmp = strcasecmp; } int cmd_shortlog(int argc, const char **argv, const char *prefix) @@ -260,8 +412,9 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) int nongit = !startup_info->have_repository; const struct option options[] = { - OPT_BOOL('c', "committer", &log.committer, - N_("Group by committer rather than author")), + OPT_BIT('c', "committer", &log.groups, + N_("Group by committer rather than author"), + SHORTLOG_GROUP_COMMITTER), OPT_BOOL('n', "numbered", &log.sort_by_number, N_("sort output according to the number of commits per author")), OPT_BOOL('s', "summary", &log.summary, @@ -271,6 +424,8 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix) OPT_CALLBACK_F('w', NULL, &log, N_("<w>[,<i1>[,<i2>]]"), N_("Linewrap output"), PARSE_OPT_OPTARG, &parse_wrap_args), + OPT_CALLBACK(0, "group", &log, N_("field"), + N_("Group by field"), parse_group_option), OPT_END(), }; @@ -311,6 +466,10 @@ parse_done: log.abbrev = rev.abbrev; log.file = rev.diffopt.file; + if (!log.groups) + log.groups = SHORTLOG_GROUP_AUTHOR; + string_list_sort(&log.trailers); + /* assume HEAD if from a tty */ if (!nongit && !rev.pending.nr && isatty(0)) add_head_to_pending(&rev); diff --git a/shortlog.h b/shortlog.h index 2fa61c4294..64be879b24 100644 --- a/shortlog.h +++ b/shortlog.h @@ -15,7 +15,13 @@ struct shortlog { int in2; int user_format; int abbrev; - int committer; + + enum { + SHORTLOG_GROUP_AUTHOR = (1 << 0), + SHORTLOG_GROUP_COMMITTER = (1 << 1), + SHORTLOG_GROUP_TRAILER = (1 << 2), + } groups; + struct string_list trailers; char *common_repo_prefix; int email; diff --git a/t/t4201-shortlog.sh b/t/t4201-shortlog.sh index d3a7ce6bbb..3d5c4a2086 100755 --- a/t/t4201-shortlog.sh +++ b/t/t4201-shortlog.sh @@ -215,4 +215,145 @@ test_expect_success 'shortlog --committer (external)' ' test_cmp expect actual ' +test_expect_success '--group=committer is the same as --committer' ' + git shortlog -ns --group=committer HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'shortlog --group=trailer:signed-off-by' ' + git commit --allow-empty -m foo -s && + GIT_COMMITTER_NAME="SOB One" \ + GIT_COMMITTER_EMAIL=sob@example.com \ + git commit --allow-empty -m foo -s && + git commit --allow-empty --amend --no-edit -s && + cat >expect <<-\EOF && + 2 C O Mitter <committer@example.com> + 1 SOB One <sob@example.com> + EOF + git shortlog -nse --group=trailer:signed-off-by HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'trailer idents are split' ' + cat >expect <<-\EOF && + 2 C O Mitter + 1 SOB One + EOF + git shortlog -ns --group=trailer:signed-off-by HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'trailer idents are mailmapped' ' + cat >expect <<-\EOF && + 2 C O Mitter + 1 Another Name + EOF + echo "Another Name <sob@example.com>" >mail.map && + git -c mailmap.file=mail.map shortlog -ns \ + --group=trailer:signed-off-by HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'shortlog de-duplicates trailers in a single commit' ' + git commit --allow-empty -F - <<-\EOF && + subject one + + this message has two distinct values, plus a repeat + + Repeated-trailer: Foo + Repeated-trailer: Bar + Repeated-trailer: Foo + EOF + + git commit --allow-empty -F - <<-\EOF && + subject two + + similar to the previous, but without the second distinct value + + Repeated-trailer: Foo + Repeated-trailer: Foo + EOF + + cat >expect <<-\EOF && + 2 Foo + 1 Bar + EOF + git shortlog -ns --group=trailer:repeated-trailer -2 HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'shortlog can match multiple groups' ' + git commit --allow-empty -F - <<-\EOF && + subject one + + this has two trailers that are distinct from the author; it will count + 3 times in the output + + Some-trailer: User A <a@example.com> + Another-trailer: User B <b@example.com> + EOF + + git commit --allow-empty -F - <<-\EOF && + subject two + + this one has two trailers, one of which is a duplicate with the author; + it will only be counted once for them + + Another-trailer: A U Thor <author@example.com> + Some-trailer: User B <b@example.com> + EOF + + cat >expect <<-\EOF && + 2 A U Thor + 2 User B + 1 User A + EOF + git shortlog -ns \ + --group=author \ + --group=trailer:some-trailer \ + --group=trailer:another-trailer \ + -2 HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'set up option selection tests' ' + git commit --allow-empty -F - <<-\EOF + subject + + body + + Trailer-one: value-one + Trailer-two: value-two + EOF +' + +test_expect_success '--no-group resets group list to author' ' + cat >expect <<-\EOF && + 1 A U Thor + EOF + git shortlog -ns \ + --group=committer \ + --group=trailer:trailer-one \ + --no-group \ + -1 HEAD >actual && + test_cmp expect actual +' + +test_expect_success '--no-group resets trailer list' ' + cat >expect <<-\EOF && + 1 value-two + EOF + git shortlog -ns \ + --group=trailer:trailer-one \ + --no-group \ + --group=trailer:trailer-two \ + -1 HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'stdin with multiple groups reports error' ' + git log >log && + test_must_fail git shortlog --group=author --group=committer <log +' + test_done @@ -1183,3 +1183,39 @@ void format_trailers_from_commit(struct strbuf *out, const char *msg, format_trailer_info(out, &info, opts); trailer_info_release(&info); } + +void trailer_iterator_init(struct trailer_iterator *iter, const char *msg) +{ + struct process_trailer_options opts = PROCESS_TRAILER_OPTIONS_INIT; + strbuf_init(&iter->key, 0); + strbuf_init(&iter->val, 0); + opts.no_divider = 1; + trailer_info_get(&iter->info, msg, &opts); + iter->cur = 0; +} + +int trailer_iterator_advance(struct trailer_iterator *iter) +{ + while (iter->cur < iter->info.trailer_nr) { + char *trailer = iter->info.trailers[iter->cur++]; + int separator_pos = find_separator(trailer, separators); + + if (separator_pos < 1) + continue; /* not a real trailer */ + + strbuf_reset(&iter->key); + strbuf_reset(&iter->val); + parse_trailer(&iter->key, &iter->val, NULL, + trailer, separator_pos); + unfold_value(&iter->val); + return 1; + } + return 0; +} + +void trailer_iterator_release(struct trailer_iterator *iter) +{ + trailer_info_release(&iter->info); + strbuf_release(&iter->val); + strbuf_release(&iter->key); +} @@ -2,8 +2,7 @@ #define TRAILER_H #include "list.h" - -struct strbuf; +#include "strbuf.h" enum trailer_where { WHERE_DEFAULT, @@ -103,4 +102,46 @@ void trailer_info_release(struct trailer_info *info); void format_trailers_from_commit(struct strbuf *out, const char *msg, const struct process_trailer_options *opts); +/* + * An interface for iterating over the trailers found in a particular commit + * message. Use like: + * + * struct trailer_iterator iter; + * trailer_iterator_init(&iter, msg); + * while (trailer_iterator_advance(&iter)) + * ... do something with iter.key and iter.val ... + * trailer_iterator_release(&iter); + */ +struct trailer_iterator { + struct strbuf key; + struct strbuf val; + + /* private */ + struct trailer_info info; + size_t cur; +}; + +/* + * Initialize "iter" in preparation for walking over the trailers in the commit + * message "msg". The "msg" pointer must remain valid until the iterator is + * released. + * + * After initializing, note that key/val will not yet point to any trailer. + * Call advance() to parse the first one (if any). + */ +void trailer_iterator_init(struct trailer_iterator *iter, const char *msg); + +/* + * Advance to the next trailer of the iterator. Returns 0 if there is no such + * trailer, and 1 otherwise. The key and value of the trailer can be + * fetched from the iter->key and iter->value fields (which are valid + * only until the next advance). + */ +int trailer_iterator_advance(struct trailer_iterator *iter); + +/* + * Release all resources associated with the trailer iteration. + */ +void trailer_iterator_release(struct trailer_iterator *iter); + #endif /* TRAILER_H */ |