From 659c69cfef984e7416decc78841877207e9d5914 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 9 Nov 2007 01:49:36 +0100 Subject: Add strchrnul() As suggested by Pierre Habouzit, add strchrnul(). It's a useful GNU extension and can simplify string parser code. There are several places in git that can be converted to strchrnul(); as a trivial example, this patch introduces its usage to builtin-fetch--tool.c. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Makefile | 13 +++++++++++++ builtin-fetch--tool.c | 8 ++------ compat/strchrnul.c | 8 ++++++++ git-compat-util.h | 5 +++++ 4 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 compat/strchrnul.c diff --git a/Makefile b/Makefile index 621270f623..69dcbb227e 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,8 @@ all:: # # Define NO_MEMMEM if you don't have memmem. # +# Define NO_STRCHRNUL if you don't have strchrnul. +# # Define NO_STRLCPY if you don't have strlcpy. # # Define NO_STRTOUMAX if you don't have strtoumax in the C library. @@ -406,6 +408,7 @@ ifeq ($(uname_S),Darwin) OLD_ICONV = UnfortunatelyYes NO_STRLCPY = YesPlease NO_MEMMEM = YesPlease + NO_STRCHRNUL = YesPlease endif ifeq ($(uname_S),SunOS) NEEDS_SOCKET = YesPlease @@ -413,6 +416,7 @@ ifeq ($(uname_S),SunOS) SHELL_PATH = /bin/bash NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease + NO_STRCHRNUL = YesPlease NO_HSTRERROR = YesPlease ifeq ($(uname_R),5.8) NEEDS_LIBICONV = YesPlease @@ -438,6 +442,7 @@ ifeq ($(uname_O),Cygwin) NO_D_INO_IN_DIRENT = YesPlease NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease + NO_STRCHRNUL = YesPlease NO_SYMLINK_HEAD = YesPlease NEEDS_LIBICONV = YesPlease NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes @@ -452,12 +457,14 @@ endif ifeq ($(uname_S),FreeBSD) NEEDS_LIBICONV = YesPlease NO_MEMMEM = YesPlease + NO_STRCHRNUL = YesPlease BASIC_CFLAGS += -I/usr/local/include BASIC_LDFLAGS += -L/usr/local/lib endif ifeq ($(uname_S),OpenBSD) NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease + NO_STRCHRNUL = YesPlease NEEDS_LIBICONV = YesPlease BASIC_CFLAGS += -I/usr/local/include BASIC_LDFLAGS += -L/usr/local/lib @@ -473,6 +480,7 @@ endif ifeq ($(uname_S),AIX) NO_STRCASESTR=YesPlease NO_MEMMEM = YesPlease + NO_STRCHRNUL = YesPlease NO_STRLCPY = YesPlease NEEDS_LIBICONV=YesPlease endif @@ -485,6 +493,7 @@ ifeq ($(uname_S),IRIX64) NO_SETENV=YesPlease NO_STRCASESTR=YesPlease NO_MEMMEM = YesPlease + NO_STRCHRNUL = YesPlease NO_STRLCPY = YesPlease NO_SOCKADDR_STORAGE=YesPlease SHELL_PATH=/usr/gnu/bin/bash @@ -695,6 +704,10 @@ ifdef NO_MEMMEM COMPAT_CFLAGS += -DNO_MEMMEM COMPAT_OBJS += compat/memmem.o endif +ifdef NO_STRCHRNUL + COMPAT_CFLAGS += -DNO_STRCHRNUL + COMPAT_OBJS += compat/strchrnul.o +endif ifdef THREADED_DELTA_SEARCH BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c index 6a78517958..ed60847d9f 100644 --- a/builtin-fetch--tool.c +++ b/builtin-fetch--tool.c @@ -435,9 +435,7 @@ static int pick_rref(int sha1_only, const char *rref, const char *ls_remote_resu cp++; if (!*cp) break; - np = strchr(cp, '\n'); - if (!np) - np = cp + strlen(cp); + np = strchrnul(cp, '\n'); if (pass) { lrr_list[i].line = cp; lrr_list[i].name = cp + 41; @@ -461,9 +459,7 @@ static int pick_rref(int sha1_only, const char *rref, const char *ls_remote_resu rref++; if (!*rref) break; - next = strchr(rref, '\n'); - if (!next) - next = rref + strlen(rref); + next = strchrnul(rref, '\n'); rreflen = next - rref; for (i = 0; i < lrr_count; i++) { diff --git a/compat/strchrnul.c b/compat/strchrnul.c new file mode 100644 index 0000000000..51839feb6e --- /dev/null +++ b/compat/strchrnul.c @@ -0,0 +1,8 @@ +#include "../git-compat-util.h" + +char *gitstrchrnul(const char *s, int c) +{ + while (*s && *s != c) + s++; + return (char *)s; +} diff --git a/git-compat-util.h b/git-compat-util.h index 7b29d1b905..e72654bc40 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -183,6 +183,11 @@ void *gitmemmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); #endif +#ifdef NO_STRCHRNUL +#define strchrnul gitstrchrnul +char *gitstrchrnul(const char *s, int c); +#endif + extern void release_pack_memory(size_t, int); static inline char* xstrdup(const char *str) -- cgit v1.2.3 From cde75e59e1b2d8dd3ba49bc9034692dd06ee3907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Fri, 9 Nov 2007 01:49:42 +0100 Subject: --pretty=format: on-demand format expansion Some of the --pretty=format placeholders expansions are expensive to calculate. This is made worse by the current code's use of interpolate(), which requires _all_ placeholders are to be prepared up front. One way to speed this up is to check which placeholders are present in the format string and to prepare only the expansions that are needed. That still leaves the allocation overhead of interpolate(). Another way is to use a callback based approach together with the strbuf library to keep allocations to a minimum and avoid string copies. That's what this patch does. It introduces a new strbuf function, strbuf_expand(). The function takes a format string, list of placeholder strings, a user supplied function 'fn', and an opaque pointer 'context' to tell 'fn' what thingy to operate on. The function 'fn' is expected to accept a strbuf, a parsed placeholder string and the 'context' pointer, and append the interpolated value for the 'context' thingy, according to the format specified by the placeholder. Thanks to Pierre Habouzit for his suggestion to use strchrnul() and the code surrounding its callsite. And thanks to Junio for most of this commit message. :) Here my measurements of most of Paul Mackerras' test cases that highlighted the performance problem (best of three runs): (master) $ time git log --pretty=oneline >/dev/null real 0m0.390s user 0m0.340s sys 0m0.040s (master) $ time git log --pretty=raw >/dev/null real 0m0.434s user 0m0.408s sys 0m0.016s (master) $ time git log --pretty="format:%H {%P} %ct" >/dev/null real 0m1.347s user 0m0.080s sys 0m1.256s (interp_find_active -- Dscho) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.694s user 0m0.020s sys 0m0.672s (strbuf_expand -- this patch) $ time ./git log --pretty="format:%H {%P} %ct" >/dev/null real 0m0.395s user 0m0.352s sys 0m0.028s Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- pretty.c | 276 +++++++++++++++++++++++++++++++++++---------------------------- strbuf.c | 24 ++++++ strbuf.h | 3 + 3 files changed, 180 insertions(+), 123 deletions(-) diff --git a/pretty.c b/pretty.c index 490cede263..9fbd73f748 100644 --- a/pretty.c +++ b/pretty.c @@ -1,6 +1,5 @@ #include "cache.h" #include "commit.h" -#include "interpolate.h" #include "utf8.h" #include "diff.h" #include "revision.h" @@ -283,7 +282,8 @@ static char *logmsg_reencode(const struct commit *commit, return out; } -static void fill_person(struct interp *table, const char *msg, int len) +static void format_person_part(struct strbuf *sb, char part, + const char *msg, int len) { int start, end, tz = 0; unsigned long date; @@ -295,7 +295,10 @@ static void fill_person(struct interp *table, const char *msg, int len) start = end + 1; while (end > 0 && isspace(msg[end - 1])) end--; - table[0].value = xmemdupz(msg, end); + if (part == 'n') { /* name */ + strbuf_add(sb, msg, end); + return; + } if (start >= len) return; @@ -307,7 +310,10 @@ static void fill_person(struct interp *table, const char *msg, int len) if (end >= len) return; - table[1].value = xmemdupz(msg + start, end - start); + if (part == 'e') { /* email */ + strbuf_add(sb, msg + start, end - start); + return; + } /* parse date */ for (start = end + 1; start < len && isspace(msg[start]); start++) @@ -318,7 +324,10 @@ static void fill_person(struct interp *table, const char *msg, int len) if (msg + start == ep) return; - table[5].value = xmemdupz(msg + start, ep - (msg + start)); + if (part == 't') { /* date, UNIX timestamp */ + strbuf_add(sb, msg + start, ep - (msg + start)); + return; + } /* parse tz */ for (start = ep - msg + 1; start < len && isspace(msg[start]); start++) @@ -329,123 +338,107 @@ static void fill_person(struct interp *table, const char *msg, int len) tz = -tz; } - interp_set_entry(table, 2, show_date(date, tz, DATE_NORMAL)); - interp_set_entry(table, 3, show_date(date, tz, DATE_RFC2822)); - interp_set_entry(table, 4, show_date(date, tz, DATE_RELATIVE)); - interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601)); + switch (part) { + case 'd': /* date */ + strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL)); + return; + case 'D': /* date, RFC2822 style */ + strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822)); + return; + case 'r': /* date, relative */ + strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE)); + return; + case 'i': /* date, ISO 8601 */ + strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601)); + return; + } } -void format_commit_message(const struct commit *commit, - const void *format, struct strbuf *sb) +static void format_commit_item(struct strbuf *sb, const char *placeholder, + void *context) { - struct interp table[] = { - { "%H" }, /* commit hash */ - { "%h" }, /* abbreviated commit hash */ - { "%T" }, /* tree hash */ - { "%t" }, /* abbreviated tree hash */ - { "%P" }, /* parent hashes */ - { "%p" }, /* abbreviated parent hashes */ - { "%an" }, /* author name */ - { "%ae" }, /* author email */ - { "%ad" }, /* author date */ - { "%aD" }, /* author date, RFC2822 style */ - { "%ar" }, /* author date, relative */ - { "%at" }, /* author date, UNIX timestamp */ - { "%ai" }, /* author date, ISO 8601 */ - { "%cn" }, /* committer name */ - { "%ce" }, /* committer email */ - { "%cd" }, /* committer date */ - { "%cD" }, /* committer date, RFC2822 style */ - { "%cr" }, /* committer date, relative */ - { "%ct" }, /* committer date, UNIX timestamp */ - { "%ci" }, /* committer date, ISO 8601 */ - { "%e" }, /* encoding */ - { "%s" }, /* subject */ - { "%b" }, /* body */ - { "%Cred" }, /* red */ - { "%Cgreen" }, /* green */ - { "%Cblue" }, /* blue */ - { "%Creset" }, /* reset color */ - { "%n" }, /* newline */ - { "%m" }, /* left/right/bottom */ - }; - enum interp_index { - IHASH = 0, IHASH_ABBREV, - ITREE, ITREE_ABBREV, - IPARENTS, IPARENTS_ABBREV, - IAUTHOR_NAME, IAUTHOR_EMAIL, - IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE, - IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601, - ICOMMITTER_NAME, ICOMMITTER_EMAIL, - ICOMMITTER_DATE, ICOMMITTER_DATE_RFC2822, - ICOMMITTER_DATE_RELATIVE, ICOMMITTER_TIMESTAMP, - ICOMMITTER_ISO8601, - IENCODING, - ISUBJECT, - IBODY, - IRED, IGREEN, IBLUE, IRESET_COLOR, - INEWLINE, - ILEFT_RIGHT, - }; + const struct commit *commit = context; struct commit_list *p; - char parents[1024]; - unsigned long len; int i; enum { HEADER, SUBJECT, BODY } state; const char *msg = commit->buffer; - if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table)) - die("invalid interp table!"); - /* these are independent of the commit */ - interp_set_entry(table, IRED, "\033[31m"); - interp_set_entry(table, IGREEN, "\033[32m"); - interp_set_entry(table, IBLUE, "\033[34m"); - interp_set_entry(table, IRESET_COLOR, "\033[m"); - interp_set_entry(table, INEWLINE, "\n"); + switch (placeholder[0]) { + case 'C': + switch (placeholder[3]) { + case 'd': /* red */ + strbuf_addstr(sb, "\033[31m"); + return; + case 'e': /* green */ + strbuf_addstr(sb, "\033[32m"); + return; + case 'u': /* blue */ + strbuf_addstr(sb, "\033[34m"); + return; + case 's': /* reset color */ + strbuf_addstr(sb, "\033[m"); + return; + } + case 'n': /* newline */ + strbuf_addch(sb, '\n'); + return; + } /* these depend on the commit */ if (!commit->object.parsed) parse_object(commit->object.sha1); - interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1)); - interp_set_entry(table, IHASH_ABBREV, - find_unique_abbrev(commit->object.sha1, - DEFAULT_ABBREV)); - interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1)); - interp_set_entry(table, ITREE_ABBREV, - find_unique_abbrev(commit->tree->object.sha1, - DEFAULT_ABBREV)); - interp_set_entry(table, ILEFT_RIGHT, - (commit->object.flags & BOUNDARY) - ? "-" - : (commit->object.flags & SYMMETRIC_LEFT) - ? "<" - : ">"); - - parents[1] = 0; - for (i = 0, p = commit->parents; - p && i < sizeof(parents) - 1; - p = p->next) - i += snprintf(parents + i, sizeof(parents) - i - 1, " %s", - sha1_to_hex(p->item->object.sha1)); - interp_set_entry(table, IPARENTS, parents + 1); - - parents[1] = 0; - for (i = 0, p = commit->parents; - p && i < sizeof(parents) - 1; - p = p->next) - i += snprintf(parents + i, sizeof(parents) - i - 1, " %s", - find_unique_abbrev(p->item->object.sha1, - DEFAULT_ABBREV)); - interp_set_entry(table, IPARENTS_ABBREV, parents + 1); + switch (placeholder[0]) { + case 'H': /* commit hash */ + strbuf_addstr(sb, sha1_to_hex(commit->object.sha1)); + return; + case 'h': /* abbreviated commit hash */ + strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1, + DEFAULT_ABBREV)); + return; + case 'T': /* tree hash */ + strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1)); + return; + case 't': /* abbreviated tree hash */ + strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1, + DEFAULT_ABBREV)); + return; + case 'P': /* parent hashes */ + for (p = commit->parents; p; p = p->next) { + if (p != commit->parents) + strbuf_addch(sb, ' '); + strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1)); + } + return; + case 'p': /* abbreviated parent hashes */ + for (p = commit->parents; p; p = p->next) { + if (p != commit->parents) + strbuf_addch(sb, ' '); + strbuf_addstr(sb, find_unique_abbrev( + p->item->object.sha1, DEFAULT_ABBREV)); + } + return; + case 'm': /* left/right/bottom */ + strbuf_addch(sb, (commit->object.flags & BOUNDARY) + ? '-' + : (commit->object.flags & SYMMETRIC_LEFT) + ? '<' + : '>'); + return; + } + + /* For the rest we have to parse the commit header. */ for (i = 0, state = HEADER; msg[i] && state < BODY; i++) { int eol; for (eol = i; msg[eol] && msg[eol] != '\n'; eol++) ; /* do nothing */ if (state == SUBJECT) { - table[ISUBJECT].value = xmemdupz(msg + i, eol - i); + if (placeholder[0] == 's') { + strbuf_add(sb, msg + i, eol - i); + return; + } i = eol; } if (i == eol) { @@ -453,29 +446,66 @@ void format_commit_message(const struct commit *commit, /* strip empty lines */ while (msg[eol + 1] == '\n') eol++; - } else if (!prefixcmp(msg + i, "author ")) - fill_person(table + IAUTHOR_NAME, - msg + i + 7, eol - i - 7); - else if (!prefixcmp(msg + i, "committer ")) - fill_person(table + ICOMMITTER_NAME, - msg + i + 10, eol - i - 10); - else if (!prefixcmp(msg + i, "encoding ")) - table[IENCODING].value = - xmemdupz(msg + i + 9, eol - i - 9); + } else if (!prefixcmp(msg + i, "author ")) { + if (placeholder[0] == 'a') { + format_person_part(sb, placeholder[1], + msg + i + 7, eol - i - 7); + return; + } + } else if (!prefixcmp(msg + i, "committer ")) { + if (placeholder[0] == 'c') { + format_person_part(sb, placeholder[1], + msg + i + 10, eol - i - 10); + return; + } + } else if (!prefixcmp(msg + i, "encoding ")) { + if (placeholder[0] == 'e') { + strbuf_add(sb, msg + i + 9, eol - i - 9); + return; + } + } i = eol; } - if (msg[i]) - table[IBODY].value = xstrdup(msg + i); - - len = interpolate(sb->buf + sb->len, strbuf_avail(sb), - format, table, ARRAY_SIZE(table)); - if (len > strbuf_avail(sb)) { - strbuf_grow(sb, len); - interpolate(sb->buf + sb->len, strbuf_avail(sb) + 1, - format, table, ARRAY_SIZE(table)); - } - strbuf_setlen(sb, sb->len + len); - interp_clear_table(table, ARRAY_SIZE(table)); + if (msg[i] && placeholder[0] == 'b') /* body */ + strbuf_addstr(sb, msg + i); +} + +void format_commit_message(const struct commit *commit, + const void *format, struct strbuf *sb) +{ + const char *placeholders[] = { + "H", /* commit hash */ + "h", /* abbreviated commit hash */ + "T", /* tree hash */ + "t", /* abbreviated tree hash */ + "P", /* parent hashes */ + "p", /* abbreviated parent hashes */ + "an", /* author name */ + "ae", /* author email */ + "ad", /* author date */ + "aD", /* author date, RFC2822 style */ + "ar", /* author date, relative */ + "at", /* author date, UNIX timestamp */ + "ai", /* author date, ISO 8601 */ + "cn", /* committer name */ + "ce", /* committer email */ + "cd", /* committer date */ + "cD", /* committer date, RFC2822 style */ + "cr", /* committer date, relative */ + "ct", /* committer date, UNIX timestamp */ + "ci", /* committer date, ISO 8601 */ + "e", /* encoding */ + "s", /* subject */ + "b", /* body */ + "Cred", /* red */ + "Cgreen", /* green */ + "Cblue", /* blue */ + "Creset", /* reset color */ + "n", /* newline */ + "m", /* left/right/bottom */ + NULL + }; + strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit); } static void pp_header(enum cmit_fmt fmt, diff --git a/strbuf.c b/strbuf.c index f4201e160d..536b43204e 100644 --- a/strbuf.c +++ b/strbuf.c @@ -129,6 +129,30 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...) strbuf_setlen(sb, sb->len + len); } +void strbuf_expand(struct strbuf *sb, const char *format, + const char **placeholders, expand_fn_t fn, void *context) +{ + for (;;) { + const char *percent, **p; + + percent = strchrnul(format, '%'); + strbuf_add(sb, format, percent - format); + if (!*percent) + break; + format = percent + 1; + + for (p = placeholders; *p; p++) { + if (!prefixcmp(format, *p)) + break; + } + if (*p) { + fn(sb, *p, context); + format += strlen(*p); + } else + strbuf_addch(sb, '%'); + } +} + size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) { size_t res; diff --git a/strbuf.h b/strbuf.h index cd7f295b65..21d8944154 100644 --- a/strbuf.h +++ b/strbuf.h @@ -102,6 +102,9 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) { strbuf_add(sb, sb2->buf, sb2->len); } +typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); +extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context); + __attribute__((format(printf,2,3))) extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); -- cgit v1.2.3 From f29d59586c2a1666d18776cca5d96752dec0e8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 10 Nov 2007 12:14:20 +0100 Subject: --pretty=format: parse commit message only once As Jeff King pointed out, some placeholder expansions are related to each other: the steps to calculate one go most of the way towards calculating the other, too. This patch makes format_commit_message() parse the commit message only once, remembering the position of each item. This speeds up handling of format strings containing multiple placeholders from the set %s, %a*, %c*, %e, %b. Here are the timings for the git version in next. The first one is to estimate the overhead of the caching, the second one is taken from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an example of a format string found in the wild. The times are the fastest of three consecutive runs in each case: $ time git log --pretty=format:%e >/dev/null real 0m0.381s user 0m0.340s sys 0m0.024s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.623s user 0m0.556s sys 0m0.052s And here the times with this patch: $ time git log --pretty=format:%e >/dev/null real 0m0.385s user 0m0.332s sys 0m0.040s $ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null real 0m0.563s user 0m0.504s sys 0m0.048s Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- pretty.c | 124 +++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 82 insertions(+), 42 deletions(-) diff --git a/pretty.c b/pretty.c index 9fbd73f748..17a3010a6e 100644 --- a/pretty.c +++ b/pretty.c @@ -354,14 +354,67 @@ static void format_person_part(struct strbuf *sb, char part, } } -static void format_commit_item(struct strbuf *sb, const char *placeholder, - void *context) +struct chunk { + size_t off; + size_t len; +}; + +struct format_commit_context { + const struct commit *commit; + + /* These offsets are relative to the start of the commit message. */ + int commit_header_parsed; + struct chunk subject; + struct chunk author; + struct chunk committer; + struct chunk encoding; + size_t body_off; +}; + +static void parse_commit_header(struct format_commit_context *context) { - const struct commit *commit = context; - struct commit_list *p; + const char *msg = context->commit->buffer; int i; enum { HEADER, SUBJECT, BODY } state; + + for (i = 0, state = HEADER; msg[i] && state < BODY; i++) { + int eol; + for (eol = i; msg[eol] && msg[eol] != '\n'; eol++) + ; /* do nothing */ + + if (state == SUBJECT) { + context->subject.off = i; + context->subject.len = eol - i; + i = eol; + } + if (i == eol) { + state++; + /* strip empty lines */ + while (msg[eol + 1] == '\n') + eol++; + } else if (!prefixcmp(msg + i, "author ")) { + context->author.off = i + 7; + context->author.len = eol - i - 7; + } else if (!prefixcmp(msg + i, "committer ")) { + context->committer.off = i + 10; + context->committer.len = eol - i - 10; + } else if (!prefixcmp(msg + i, "encoding ")) { + context->encoding.off = i + 9; + context->encoding.len = eol - i - 9; + } + i = eol; + } + context->body_off = i; + context->commit_header_parsed = 1; +} + +static void format_commit_item(struct strbuf *sb, const char *placeholder, + void *context) +{ + struct format_commit_context *c = context; + const struct commit *commit = c->commit; const char *msg = commit->buffer; + struct commit_list *p; /* these are independent of the commit */ switch (placeholder[0]) { @@ -429,45 +482,28 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder, } /* For the rest we have to parse the commit header. */ - for (i = 0, state = HEADER; msg[i] && state < BODY; i++) { - int eol; - for (eol = i; msg[eol] && msg[eol] != '\n'; eol++) - ; /* do nothing */ + if (!c->commit_header_parsed) + parse_commit_header(c); - if (state == SUBJECT) { - if (placeholder[0] == 's') { - strbuf_add(sb, msg + i, eol - i); - return; - } - i = eol; - } - if (i == eol) { - state++; - /* strip empty lines */ - while (msg[eol + 1] == '\n') - eol++; - } else if (!prefixcmp(msg + i, "author ")) { - if (placeholder[0] == 'a') { - format_person_part(sb, placeholder[1], - msg + i + 7, eol - i - 7); - return; - } - } else if (!prefixcmp(msg + i, "committer ")) { - if (placeholder[0] == 'c') { - format_person_part(sb, placeholder[1], - msg + i + 10, eol - i - 10); - return; - } - } else if (!prefixcmp(msg + i, "encoding ")) { - if (placeholder[0] == 'e') { - strbuf_add(sb, msg + i + 9, eol - i - 9); - return; - } - } - i = eol; + switch (placeholder[0]) { + case 's': + strbuf_add(sb, msg + c->subject.off, c->subject.len); + return; + case 'a': + format_person_part(sb, placeholder[1], + msg + c->author.off, c->author.len); + return; + case 'c': + format_person_part(sb, placeholder[1], + msg + c->committer.off, c->committer.len); + return; + case 'e': + strbuf_add(sb, msg + c->encoding.off, c->encoding.len); + return; + case 'b': + strbuf_addstr(sb, msg + c->body_off); + return; } - if (msg[i] && placeholder[0] == 'b') /* body */ - strbuf_addstr(sb, msg + i); } void format_commit_message(const struct commit *commit, @@ -505,7 +541,11 @@ void format_commit_message(const struct commit *commit, "m", /* left/right/bottom */ NULL }; - strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit); + struct format_commit_context context; + + memset(&context, 0, sizeof(context)); + context.commit = commit; + strbuf_expand(sb, format, placeholders, format_commit_item, &context); } static void pp_header(enum cmit_fmt fmt, -- cgit v1.2.3 From 91db267ec849279053cf3ac3066c2f2c11db4321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 10 Nov 2007 12:16:05 +0100 Subject: add strbuf_adddup() Add a new function, strbuf_adddup(), that appends a duplicate of a part of a struct strbuf to end of the latter. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- strbuf.c | 7 +++++++ strbuf.h | 1 + 2 files changed, 8 insertions(+) diff --git a/strbuf.c b/strbuf.c index 536b43204e..dbd8c4bcfb 100644 --- a/strbuf.c +++ b/strbuf.c @@ -106,6 +106,13 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len) strbuf_setlen(sb, sb->len + len); } +void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) +{ + strbuf_grow(sb, len); + memcpy(sb->buf + sb->len, sb->buf + pos, len); + strbuf_setlen(sb, sb->len + len); +} + void strbuf_addf(struct strbuf *sb, const char *fmt, ...) { int len; diff --git a/strbuf.h b/strbuf.h index 21d8944154..13919123dc 100644 --- a/strbuf.h +++ b/strbuf.h @@ -101,6 +101,7 @@ static inline void strbuf_addstr(struct strbuf *sb, const char *s) { static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) { strbuf_add(sb, sb2->buf, sb2->len); } +extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context); -- cgit v1.2.3 From b9c62321380374d09cc99570cdd1390674532832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 10 Nov 2007 12:18:26 +0100 Subject: --format=pretty: avoid calculating expensive expansions twice As Jeff King remarked, format strings with duplicate placeholders can be slow to expand, because each instance is calculated anew. This patch makes use of the fact that format_commit_message() and its helper functions only ever add stuff to the end of the strbuf. For certain expensive placeholders, store the offset and length of their expansion with the strbuf at the first occurrence. Later they expansion result can simply be copied from there -- no malloc() or strdup() required. These certain placeholders are the abbreviated commit, tree and parent hashes, as the search for a unique abbreviated hash is quite costly. Here are the times for next (best of three runs): $ time git log --pretty=format:%h >/dev/null real 0m0.611s user 0m0.404s sys 0m0.204s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m1.206s user 0m0.744s sys 0m0.452s And here those with this patch (and the previous two); the speedup of the single placeholder case is just noise: $ time git log --pretty=format:%h >/dev/null real 0m0.608s user 0m0.416s sys 0m0.192s $ time git log --pretty=format:%h%h%h%h >/dev/null real 0m0.639s user 0m0.488s sys 0m0.140s Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- pretty.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pretty.c b/pretty.c index 17a3010a6e..9db75b4e4f 100644 --- a/pretty.c +++ b/pretty.c @@ -369,8 +369,30 @@ struct format_commit_context { struct chunk committer; struct chunk encoding; size_t body_off; + + /* The following ones are relative to the result struct strbuf. */ + struct chunk abbrev_commit_hash; + struct chunk abbrev_tree_hash; + struct chunk abbrev_parent_hashes; }; +static int add_again(struct strbuf *sb, struct chunk *chunk) +{ + if (chunk->len) { + strbuf_adddup(sb, chunk->off, chunk->len); + return 1; + } + + /* + * We haven't seen this chunk before. Our caller is surely + * going to add it the hard way now. Remember the most likely + * start of the to-be-added chunk: the current end of the + * struct strbuf. + */ + chunk->off = sb->len; + return 0; +} + static void parse_commit_header(struct format_commit_context *context) { const char *msg = context->commit->buffer; @@ -447,15 +469,21 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder, strbuf_addstr(sb, sha1_to_hex(commit->object.sha1)); return; case 'h': /* abbreviated commit hash */ + if (add_again(sb, &c->abbrev_commit_hash)) + return; strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV)); + c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off; return; case 'T': /* tree hash */ strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1)); return; case 't': /* abbreviated tree hash */ + if (add_again(sb, &c->abbrev_tree_hash)) + return; strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1, DEFAULT_ABBREV)); + c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off; return; case 'P': /* parent hashes */ for (p = commit->parents; p; p = p->next) { @@ -465,12 +493,16 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder, } return; case 'p': /* abbreviated parent hashes */ + if (add_again(sb, &c->abbrev_parent_hashes)) + return; for (p = commit->parents; p; p = p->next) { if (p != commit->parents) strbuf_addch(sb, ' '); strbuf_addstr(sb, find_unique_abbrev( p->item->object.sha1, DEFAULT_ABBREV)); } + c->abbrev_parent_hashes.len = sb->len - + c->abbrev_parent_hashes.off; return; case 'm': /* left/right/bottom */ strbuf_addch(sb, (commit->object.flags & BOUNDARY) -- cgit v1.2.3 From 9e79f00f06a5500b30941e6925adda070504e6cf Mon Sep 17 00:00:00 2001 From: Andreas Ericsson Date: Sat, 10 Nov 2007 12:55:48 +0100 Subject: Simplify strchrnul() compat code strchrnul() was introduced in glibc in April 1999 and included in glibc-2.1. Checking for that version means the majority of all git users would get to use the optimized version in glibc. Of the remaining few some might get to use a slightly slower version than necessary but probably not slower than what we have today. Unfortunately, __GLIBC_PREREQ() macro was not available in glibc 2.1.1 which was short lived but already supported strchrnul(). Odd minority users of that library needs to live with our compatibility inline version. Rediffed-against-next-by: Rene Scharfe Signed-off-by: Junio C Hamano --- Makefile | 13 ------------- compat/strchrnul.c | 8 -------- git-compat-util.h | 9 +++++++-- 3 files changed, 7 insertions(+), 23 deletions(-) delete mode 100644 compat/strchrnul.c diff --git a/Makefile b/Makefile index 69dcbb227e..621270f623 100644 --- a/Makefile +++ b/Makefile @@ -30,8 +30,6 @@ all:: # # Define NO_MEMMEM if you don't have memmem. # -# Define NO_STRCHRNUL if you don't have strchrnul. -# # Define NO_STRLCPY if you don't have strlcpy. # # Define NO_STRTOUMAX if you don't have strtoumax in the C library. @@ -408,7 +406,6 @@ ifeq ($(uname_S),Darwin) OLD_ICONV = UnfortunatelyYes NO_STRLCPY = YesPlease NO_MEMMEM = YesPlease - NO_STRCHRNUL = YesPlease endif ifeq ($(uname_S),SunOS) NEEDS_SOCKET = YesPlease @@ -416,7 +413,6 @@ ifeq ($(uname_S),SunOS) SHELL_PATH = /bin/bash NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease - NO_STRCHRNUL = YesPlease NO_HSTRERROR = YesPlease ifeq ($(uname_R),5.8) NEEDS_LIBICONV = YesPlease @@ -442,7 +438,6 @@ ifeq ($(uname_O),Cygwin) NO_D_INO_IN_DIRENT = YesPlease NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease - NO_STRCHRNUL = YesPlease NO_SYMLINK_HEAD = YesPlease NEEDS_LIBICONV = YesPlease NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes @@ -457,14 +452,12 @@ endif ifeq ($(uname_S),FreeBSD) NEEDS_LIBICONV = YesPlease NO_MEMMEM = YesPlease - NO_STRCHRNUL = YesPlease BASIC_CFLAGS += -I/usr/local/include BASIC_LDFLAGS += -L/usr/local/lib endif ifeq ($(uname_S),OpenBSD) NO_STRCASESTR = YesPlease NO_MEMMEM = YesPlease - NO_STRCHRNUL = YesPlease NEEDS_LIBICONV = YesPlease BASIC_CFLAGS += -I/usr/local/include BASIC_LDFLAGS += -L/usr/local/lib @@ -480,7 +473,6 @@ endif ifeq ($(uname_S),AIX) NO_STRCASESTR=YesPlease NO_MEMMEM = YesPlease - NO_STRCHRNUL = YesPlease NO_STRLCPY = YesPlease NEEDS_LIBICONV=YesPlease endif @@ -493,7 +485,6 @@ ifeq ($(uname_S),IRIX64) NO_SETENV=YesPlease NO_STRCASESTR=YesPlease NO_MEMMEM = YesPlease - NO_STRCHRNUL = YesPlease NO_STRLCPY = YesPlease NO_SOCKADDR_STORAGE=YesPlease SHELL_PATH=/usr/gnu/bin/bash @@ -704,10 +695,6 @@ ifdef NO_MEMMEM COMPAT_CFLAGS += -DNO_MEMMEM COMPAT_OBJS += compat/memmem.o endif -ifdef NO_STRCHRNUL - COMPAT_CFLAGS += -DNO_STRCHRNUL - COMPAT_OBJS += compat/strchrnul.o -endif ifdef THREADED_DELTA_SEARCH BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH diff --git a/compat/strchrnul.c b/compat/strchrnul.c deleted file mode 100644 index 51839feb6e..0000000000 --- a/compat/strchrnul.c +++ /dev/null @@ -1,8 +0,0 @@ -#include "../git-compat-util.h" - -char *gitstrchrnul(const char *s, int c) -{ - while (*s && *s != c) - s++; - return (char *)s; -} diff --git a/git-compat-util.h b/git-compat-util.h index e72654bc40..92d79673f8 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -183,9 +183,14 @@ void *gitmemmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); #endif -#ifdef NO_STRCHRNUL +#if !defined(__GLIBC_PREREQ) && !__GLIBC_PREREQ(2, 1) #define strchrnul gitstrchrnul -char *gitstrchrnul(const char *s, int c); +static inline char *gitstrchrnul(const char *s, int c) +{ + while (*s && *s != c) + s++; + return (char *)s; +} #endif extern void release_pack_memory(size_t, int); -- cgit v1.2.3 From 726c8ef5a5a129d8157d0043f60fe7195d2cdb77 Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Mon, 12 Nov 2007 11:09:05 +0100 Subject: Fix preprocessor logic that determines the availablity of strchrnul(). Apart from the error in the condition (&& should actually be ||), the construct #if !defined(A) || !A leads to a syntax error in the C preprocessor if A is indeed not defined. Tested-by: David Symonds Signed-off-by: Johannes Sixt Signed-off-by: Junio C Hamano --- git-compat-util.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/git-compat-util.h b/git-compat-util.h index 92d79673f8..ede9408bbd 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -183,7 +183,13 @@ void *gitmemmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); #endif -#if !defined(__GLIBC_PREREQ) && !__GLIBC_PREREQ(2, 1) +#ifdef __GLIBC_PREREQ +#if __GLIBC_PREREQ(2, 1) +#define HAVE_STRCHRNUL +#endif +#endif + +#ifndef HAVE_STRCHRNUL #define strchrnul gitstrchrnul static inline char *gitstrchrnul(const char *s, int c) { -- cgit v1.2.3