From 659c69cfef984e7416decc78841877207e9d5914 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <rene.scharfe@lsrfire.ath.cx>
Date: Fri, 9 Nov 2007 01:49:36 +0100
Subject: Add strchrnul()

As suggested by Pierre Habouzit, add strchrnul().  It's a useful GNU
extension and can simplify string parser code.  There are several
places in git that can be converted to strchrnul(); as a trivial
example, this patch introduces its usage to builtin-fetch--tool.c.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile              | 13 +++++++++++++
 builtin-fetch--tool.c |  8 ++------
 compat/strchrnul.c    |  8 ++++++++
 git-compat-util.h     |  5 +++++
 4 files changed, 28 insertions(+), 6 deletions(-)
 create mode 100644 compat/strchrnul.c

diff --git a/Makefile b/Makefile
index 621270f623..69dcbb227e 100644
--- a/Makefile
+++ b/Makefile
@@ -30,6 +30,8 @@ all::
 #
 # Define NO_MEMMEM if you don't have memmem.
 #
+# Define NO_STRCHRNUL if you don't have strchrnul.
+#
 # Define NO_STRLCPY if you don't have strlcpy.
 #
 # Define NO_STRTOUMAX if you don't have strtoumax in the C library.
@@ -406,6 +408,7 @@ ifeq ($(uname_S),Darwin)
 	OLD_ICONV = UnfortunatelyYes
 	NO_STRLCPY = YesPlease
 	NO_MEMMEM = YesPlease
+	NO_STRCHRNUL = YesPlease
 endif
 ifeq ($(uname_S),SunOS)
 	NEEDS_SOCKET = YesPlease
@@ -413,6 +416,7 @@ ifeq ($(uname_S),SunOS)
 	SHELL_PATH = /bin/bash
 	NO_STRCASESTR = YesPlease
 	NO_MEMMEM = YesPlease
+	NO_STRCHRNUL = YesPlease
 	NO_HSTRERROR = YesPlease
 	ifeq ($(uname_R),5.8)
 		NEEDS_LIBICONV = YesPlease
@@ -438,6 +442,7 @@ ifeq ($(uname_O),Cygwin)
 	NO_D_INO_IN_DIRENT = YesPlease
 	NO_STRCASESTR = YesPlease
 	NO_MEMMEM = YesPlease
+	NO_STRCHRNUL = YesPlease
 	NO_SYMLINK_HEAD = YesPlease
 	NEEDS_LIBICONV = YesPlease
 	NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
@@ -452,12 +457,14 @@ endif
 ifeq ($(uname_S),FreeBSD)
 	NEEDS_LIBICONV = YesPlease
 	NO_MEMMEM = YesPlease
+	NO_STRCHRNUL = YesPlease
 	BASIC_CFLAGS += -I/usr/local/include
 	BASIC_LDFLAGS += -L/usr/local/lib
 endif
 ifeq ($(uname_S),OpenBSD)
 	NO_STRCASESTR = YesPlease
 	NO_MEMMEM = YesPlease
+	NO_STRCHRNUL = YesPlease
 	NEEDS_LIBICONV = YesPlease
 	BASIC_CFLAGS += -I/usr/local/include
 	BASIC_LDFLAGS += -L/usr/local/lib
@@ -473,6 +480,7 @@ endif
 ifeq ($(uname_S),AIX)
 	NO_STRCASESTR=YesPlease
 	NO_MEMMEM = YesPlease
+	NO_STRCHRNUL = YesPlease
 	NO_STRLCPY = YesPlease
 	NEEDS_LIBICONV=YesPlease
 endif
@@ -485,6 +493,7 @@ ifeq ($(uname_S),IRIX64)
 	NO_SETENV=YesPlease
 	NO_STRCASESTR=YesPlease
 	NO_MEMMEM = YesPlease
+	NO_STRCHRNUL = YesPlease
 	NO_STRLCPY = YesPlease
 	NO_SOCKADDR_STORAGE=YesPlease
 	SHELL_PATH=/usr/gnu/bin/bash
@@ -695,6 +704,10 @@ ifdef NO_MEMMEM
 	COMPAT_CFLAGS += -DNO_MEMMEM
 	COMPAT_OBJS += compat/memmem.o
 endif
+ifdef NO_STRCHRNUL
+	COMPAT_CFLAGS += -DNO_STRCHRNUL
+	COMPAT_OBJS += compat/strchrnul.o
+endif
 
 ifdef THREADED_DELTA_SEARCH
 	BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH
diff --git a/builtin-fetch--tool.c b/builtin-fetch--tool.c
index 6a78517958..ed60847d9f 100644
--- a/builtin-fetch--tool.c
+++ b/builtin-fetch--tool.c
@@ -435,9 +435,7 @@ static int pick_rref(int sha1_only, const char *rref, const char *ls_remote_resu
 				cp++;
 			if (!*cp)
 				break;
-			np = strchr(cp, '\n');
-			if (!np)
-				np = cp + strlen(cp);
+			np = strchrnul(cp, '\n');
 			if (pass) {
 				lrr_list[i].line = cp;
 				lrr_list[i].name = cp + 41;
@@ -461,9 +459,7 @@ static int pick_rref(int sha1_only, const char *rref, const char *ls_remote_resu
 			rref++;
 		if (!*rref)
 			break;
-		next = strchr(rref, '\n');
-		if (!next)
-			next = rref + strlen(rref);
+		next = strchrnul(rref, '\n');
 		rreflen = next - rref;
 
 		for (i = 0; i < lrr_count; i++) {
diff --git a/compat/strchrnul.c b/compat/strchrnul.c
new file mode 100644
index 0000000000..51839feb6e
--- /dev/null
+++ b/compat/strchrnul.c
@@ -0,0 +1,8 @@
+#include "../git-compat-util.h"
+
+char *gitstrchrnul(const char *s, int c)
+{
+	while (*s && *s != c)
+		s++;
+	return (char *)s;
+}
diff --git a/git-compat-util.h b/git-compat-util.h
index 7b29d1b905..e72654bc40 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -183,6 +183,11 @@ void *gitmemmem(const void *haystack, size_t haystacklen,
                 const void *needle, size_t needlelen);
 #endif
 
+#ifdef NO_STRCHRNUL
+#define strchrnul gitstrchrnul
+char *gitstrchrnul(const char *s, int c);
+#endif
+
 extern void release_pack_memory(size_t, int);
 
 static inline char* xstrdup(const char *str)
-- 
cgit v1.2.3


From cde75e59e1b2d8dd3ba49bc9034692dd06ee3907 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <rene.scharfe@lsrfire.ath.cx>
Date: Fri, 9 Nov 2007 01:49:42 +0100
Subject: --pretty=format: on-demand format expansion

Some of the --pretty=format placeholders expansions are expensive to
calculate.  This is made worse by the current code's use of
interpolate(), which requires _all_ placeholders are to be prepared
up front.

One way to speed this up is to check which placeholders are present
in the format string and to prepare only the expansions that are
needed.  That still leaves the allocation overhead of interpolate().

Another way is to use a callback based approach together with the
strbuf library to keep allocations to a minimum and avoid string
copies.  That's what this patch does.  It introduces a new strbuf
function, strbuf_expand().

The function takes a format string, list of placeholder strings,
a user supplied function 'fn', and an opaque pointer 'context'
to tell 'fn' what thingy to operate on.

The function 'fn' is expected to accept a strbuf, a parsed
placeholder string and the 'context' pointer, and append the
interpolated value for the 'context' thingy, according to the
format specified by the placeholder.

Thanks to Pierre Habouzit for his suggestion to use strchrnul() and
the code surrounding its callsite.  And thanks to Junio for most of
this commit message. :)

Here my measurements of most of Paul Mackerras' test cases that
highlighted the performance problem (best of three runs):

(master)
$ time git log --pretty=oneline >/dev/null

real    0m0.390s
user    0m0.340s
sys     0m0.040s

(master)
$ time git log --pretty=raw >/dev/null

real    0m0.434s
user    0m0.408s
sys     0m0.016s

(master)
$ time git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m1.347s
user    0m0.080s
sys     0m1.256s

(interp_find_active -- Dscho)
$ time ./git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m0.694s
user    0m0.020s
sys     0m0.672s

(strbuf_expand -- this patch)
$ time ./git log --pretty="format:%H {%P} %ct" >/dev/null

real    0m0.395s
user    0m0.352s
sys     0m0.028s

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pretty.c | 276 +++++++++++++++++++++++++++++++++++----------------------------
 strbuf.c |  24 ++++++
 strbuf.h |   3 +
 3 files changed, 180 insertions(+), 123 deletions(-)

diff --git a/pretty.c b/pretty.c
index 490cede263..9fbd73f748 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1,6 +1,5 @@
 #include "cache.h"
 #include "commit.h"
-#include "interpolate.h"
 #include "utf8.h"
 #include "diff.h"
 #include "revision.h"
@@ -283,7 +282,8 @@ static char *logmsg_reencode(const struct commit *commit,
 	return out;
 }
 
-static void fill_person(struct interp *table, const char *msg, int len)
+static void format_person_part(struct strbuf *sb, char part,
+                               const char *msg, int len)
 {
 	int start, end, tz = 0;
 	unsigned long date;
@@ -295,7 +295,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	start = end + 1;
 	while (end > 0 && isspace(msg[end - 1]))
 		end--;
-	table[0].value = xmemdupz(msg, end);
+	if (part == 'n') {	/* name */
+		strbuf_add(sb, msg, end);
+		return;
+	}
 
 	if (start >= len)
 		return;
@@ -307,7 +310,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	if (end >= len)
 		return;
 
-	table[1].value = xmemdupz(msg + start, end - start);
+	if (part == 'e') {	/* email */
+		strbuf_add(sb, msg + start, end - start);
+		return;
+	}
 
 	/* parse date */
 	for (start = end + 1; start < len && isspace(msg[start]); start++)
@@ -318,7 +324,10 @@ static void fill_person(struct interp *table, const char *msg, int len)
 	if (msg + start == ep)
 		return;
 
-	table[5].value = xmemdupz(msg + start, ep - (msg + start));
+	if (part == 't') {	/* date, UNIX timestamp */
+		strbuf_add(sb, msg + start, ep - (msg + start));
+		return;
+	}
 
 	/* parse tz */
 	for (start = ep - msg + 1; start < len && isspace(msg[start]); start++)
@@ -329,123 +338,107 @@ static void fill_person(struct interp *table, const char *msg, int len)
 			tz = -tz;
 	}
 
-	interp_set_entry(table, 2, show_date(date, tz, DATE_NORMAL));
-	interp_set_entry(table, 3, show_date(date, tz, DATE_RFC2822));
-	interp_set_entry(table, 4, show_date(date, tz, DATE_RELATIVE));
-	interp_set_entry(table, 6, show_date(date, tz, DATE_ISO8601));
+	switch (part) {
+	case 'd':	/* date */
+		strbuf_addstr(sb, show_date(date, tz, DATE_NORMAL));
+		return;
+	case 'D':	/* date, RFC2822 style */
+		strbuf_addstr(sb, show_date(date, tz, DATE_RFC2822));
+		return;
+	case 'r':	/* date, relative */
+		strbuf_addstr(sb, show_date(date, tz, DATE_RELATIVE));
+		return;
+	case 'i':	/* date, ISO 8601 */
+		strbuf_addstr(sb, show_date(date, tz, DATE_ISO8601));
+		return;
+	}
 }
 
-void format_commit_message(const struct commit *commit,
-                           const void *format, struct strbuf *sb)
+static void format_commit_item(struct strbuf *sb, const char *placeholder,
+                               void *context)
 {
-	struct interp table[] = {
-		{ "%H" },	/* commit hash */
-		{ "%h" },	/* abbreviated commit hash */
-		{ "%T" },	/* tree hash */
-		{ "%t" },	/* abbreviated tree hash */
-		{ "%P" },	/* parent hashes */
-		{ "%p" },	/* abbreviated parent hashes */
-		{ "%an" },	/* author name */
-		{ "%ae" },	/* author email */
-		{ "%ad" },	/* author date */
-		{ "%aD" },	/* author date, RFC2822 style */
-		{ "%ar" },	/* author date, relative */
-		{ "%at" },	/* author date, UNIX timestamp */
-		{ "%ai" },	/* author date, ISO 8601 */
-		{ "%cn" },	/* committer name */
-		{ "%ce" },	/* committer email */
-		{ "%cd" },	/* committer date */
-		{ "%cD" },	/* committer date, RFC2822 style */
-		{ "%cr" },	/* committer date, relative */
-		{ "%ct" },	/* committer date, UNIX timestamp */
-		{ "%ci" },	/* committer date, ISO 8601 */
-		{ "%e" },	/* encoding */
-		{ "%s" },	/* subject */
-		{ "%b" },	/* body */
-		{ "%Cred" },	/* red */
-		{ "%Cgreen" },	/* green */
-		{ "%Cblue" },	/* blue */
-		{ "%Creset" },	/* reset color */
-		{ "%n" },	/* newline */
-		{ "%m" },	/* left/right/bottom */
-	};
-	enum interp_index {
-		IHASH = 0, IHASH_ABBREV,
-		ITREE, ITREE_ABBREV,
-		IPARENTS, IPARENTS_ABBREV,
-		IAUTHOR_NAME, IAUTHOR_EMAIL,
-		IAUTHOR_DATE, IAUTHOR_DATE_RFC2822, IAUTHOR_DATE_RELATIVE,
-		IAUTHOR_TIMESTAMP, IAUTHOR_ISO8601,
-		ICOMMITTER_NAME, ICOMMITTER_EMAIL,
-		ICOMMITTER_DATE, ICOMMITTER_DATE_RFC2822,
-		ICOMMITTER_DATE_RELATIVE, ICOMMITTER_TIMESTAMP,
-		ICOMMITTER_ISO8601,
-		IENCODING,
-		ISUBJECT,
-		IBODY,
-		IRED, IGREEN, IBLUE, IRESET_COLOR,
-		INEWLINE,
-		ILEFT_RIGHT,
-	};
+	const struct commit *commit = context;
 	struct commit_list *p;
-	char parents[1024];
-	unsigned long len;
 	int i;
 	enum { HEADER, SUBJECT, BODY } state;
 	const char *msg = commit->buffer;
 
-	if (ILEFT_RIGHT + 1 != ARRAY_SIZE(table))
-		die("invalid interp table!");
-
 	/* these are independent of the commit */
-	interp_set_entry(table, IRED, "\033[31m");
-	interp_set_entry(table, IGREEN, "\033[32m");
-	interp_set_entry(table, IBLUE, "\033[34m");
-	interp_set_entry(table, IRESET_COLOR, "\033[m");
-	interp_set_entry(table, INEWLINE, "\n");
+	switch (placeholder[0]) {
+	case 'C':
+		switch (placeholder[3]) {
+		case 'd':	/* red */
+			strbuf_addstr(sb, "\033[31m");
+			return;
+		case 'e':	/* green */
+			strbuf_addstr(sb, "\033[32m");
+			return;
+		case 'u':	/* blue */
+			strbuf_addstr(sb, "\033[34m");
+			return;
+		case 's':	/* reset color */
+			strbuf_addstr(sb, "\033[m");
+			return;
+		}
+	case 'n':		/* newline */
+		strbuf_addch(sb, '\n');
+		return;
+	}
 
 	/* these depend on the commit */
 	if (!commit->object.parsed)
 		parse_object(commit->object.sha1);
-	interp_set_entry(table, IHASH, sha1_to_hex(commit->object.sha1));
-	interp_set_entry(table, IHASH_ABBREV,
-			find_unique_abbrev(commit->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ITREE, sha1_to_hex(commit->tree->object.sha1));
-	interp_set_entry(table, ITREE_ABBREV,
-			find_unique_abbrev(commit->tree->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, ILEFT_RIGHT,
-			 (commit->object.flags & BOUNDARY)
-			 ? "-"
-			 : (commit->object.flags & SYMMETRIC_LEFT)
-			 ? "<"
-			 : ">");
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			sha1_to_hex(p->item->object.sha1));
-	interp_set_entry(table, IPARENTS, parents + 1);
-
-	parents[1] = 0;
-	for (i = 0, p = commit->parents;
-			p && i < sizeof(parents) - 1;
-			p = p->next)
-		i += snprintf(parents + i, sizeof(parents) - i - 1, " %s",
-			find_unique_abbrev(p->item->object.sha1,
-				DEFAULT_ABBREV));
-	interp_set_entry(table, IPARENTS_ABBREV, parents + 1);
 
+	switch (placeholder[0]) {
+	case 'H':		/* commit hash */
+		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
+		return;
+	case 'h':		/* abbreviated commit hash */
+		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
+		                                     DEFAULT_ABBREV));
+		return;
+	case 'T':		/* tree hash */
+		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
+		return;
+	case 't':		/* abbreviated tree hash */
+		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
+		                                     DEFAULT_ABBREV));
+		return;
+	case 'P':		/* parent hashes */
+		for (p = commit->parents; p; p = p->next) {
+			if (p != commit->parents)
+				strbuf_addch(sb, ' ');
+			strbuf_addstr(sb, sha1_to_hex(p->item->object.sha1));
+		}
+		return;
+	case 'p':		/* abbreviated parent hashes */
+		for (p = commit->parents; p; p = p->next) {
+			if (p != commit->parents)
+				strbuf_addch(sb, ' ');
+			strbuf_addstr(sb, find_unique_abbrev(
+					p->item->object.sha1, DEFAULT_ABBREV));
+		}
+		return;
+	case 'm':		/* left/right/bottom */
+		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
+		                 ? '-'
+		                 : (commit->object.flags & SYMMETRIC_LEFT)
+		                 ? '<'
+		                 : '>');
+		return;
+	}
+
+	/* For the rest we have to parse the commit header. */
 	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
 		int eol;
 		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
 			; /* do nothing */
 
 		if (state == SUBJECT) {
-			table[ISUBJECT].value = xmemdupz(msg + i, eol - i);
+			if (placeholder[0] == 's') {
+				strbuf_add(sb, msg + i, eol - i);
+				return;
+			}
 			i = eol;
 		}
 		if (i == eol) {
@@ -453,29 +446,66 @@ void format_commit_message(const struct commit *commit,
 			/* strip empty lines */
 			while (msg[eol + 1] == '\n')
 				eol++;
-		} else if (!prefixcmp(msg + i, "author "))
-			fill_person(table + IAUTHOR_NAME,
-					msg + i + 7, eol - i - 7);
-		else if (!prefixcmp(msg + i, "committer "))
-			fill_person(table + ICOMMITTER_NAME,
-					msg + i + 10, eol - i - 10);
-		else if (!prefixcmp(msg + i, "encoding "))
-			table[IENCODING].value =
-				xmemdupz(msg + i + 9, eol - i - 9);
+		} else if (!prefixcmp(msg + i, "author ")) {
+			if (placeholder[0] == 'a') {
+				format_person_part(sb, placeholder[1],
+				                   msg + i + 7, eol - i - 7);
+				return;
+			}
+		} else if (!prefixcmp(msg + i, "committer ")) {
+			if (placeholder[0] == 'c') {
+				format_person_part(sb, placeholder[1],
+				                   msg + i + 10, eol - i - 10);
+				return;
+			}
+		} else if (!prefixcmp(msg + i, "encoding ")) {
+			if (placeholder[0] == 'e') {
+				strbuf_add(sb, msg + i + 9, eol - i - 9);
+				return;
+			}
+		}
 		i = eol;
 	}
-	if (msg[i])
-		table[IBODY].value = xstrdup(msg + i);
-
-	len = interpolate(sb->buf + sb->len, strbuf_avail(sb),
-				format, table, ARRAY_SIZE(table));
-	if (len > strbuf_avail(sb)) {
-		strbuf_grow(sb, len);
-		interpolate(sb->buf + sb->len, strbuf_avail(sb) + 1,
-					format, table, ARRAY_SIZE(table));
-	}
-	strbuf_setlen(sb, sb->len + len);
-	interp_clear_table(table, ARRAY_SIZE(table));
+	if (msg[i] && placeholder[0] == 'b')	/* body */
+		strbuf_addstr(sb, msg + i);
+}
+
+void format_commit_message(const struct commit *commit,
+                           const void *format, struct strbuf *sb)
+{
+	const char *placeholders[] = {
+		"H",		/* commit hash */
+		"h",		/* abbreviated commit hash */
+		"T",		/* tree hash */
+		"t",		/* abbreviated tree hash */
+		"P",		/* parent hashes */
+		"p",		/* abbreviated parent hashes */
+		"an",		/* author name */
+		"ae",		/* author email */
+		"ad",		/* author date */
+		"aD",		/* author date, RFC2822 style */
+		"ar",		/* author date, relative */
+		"at",		/* author date, UNIX timestamp */
+		"ai",		/* author date, ISO 8601 */
+		"cn",		/* committer name */
+		"ce",		/* committer email */
+		"cd",		/* committer date */
+		"cD",		/* committer date, RFC2822 style */
+		"cr",		/* committer date, relative */
+		"ct",		/* committer date, UNIX timestamp */
+		"ci",		/* committer date, ISO 8601 */
+		"e",		/* encoding */
+		"s",		/* subject */
+		"b",		/* body */
+		"Cred",		/* red */
+		"Cgreen",	/* green */
+		"Cblue",	/* blue */
+		"Creset",	/* reset color */
+		"n",		/* newline */
+		"m",		/* left/right/bottom */
+		NULL
+	};
+	strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
 }
 
 static void pp_header(enum cmit_fmt fmt,
diff --git a/strbuf.c b/strbuf.c
index f4201e160d..536b43204e 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -129,6 +129,30 @@ void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 	strbuf_setlen(sb, sb->len + len);
 }
 
+void strbuf_expand(struct strbuf *sb, const char *format,
+                   const char **placeholders, expand_fn_t fn, void *context)
+{
+	for (;;) {
+		const char *percent, **p;
+
+		percent = strchrnul(format, '%');
+		strbuf_add(sb, format, percent - format);
+		if (!*percent)
+			break;
+		format = percent + 1;
+
+		for (p = placeholders; *p; p++) {
+			if (!prefixcmp(format, *p))
+				break;
+		}
+		if (*p) {
+			fn(sb, *p, context);
+			format += strlen(*p);
+		} else
+			strbuf_addch(sb, '%');
+	}
+}
+
 size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f)
 {
 	size_t res;
diff --git a/strbuf.h b/strbuf.h
index cd7f295b65..21d8944154 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -102,6 +102,9 @@ static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
 	strbuf_add(sb, sb2->buf, sb2->len);
 }
 
+typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
+extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context);
+
 __attribute__((format(printf,2,3)))
 extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
 
-- 
cgit v1.2.3


From f29d59586c2a1666d18776cca5d96752dec0e8a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <rene.scharfe@lsrfire.ath.cx>
Date: Sat, 10 Nov 2007 12:14:20 +0100
Subject: --pretty=format: parse commit message only once

As Jeff King pointed out, some placeholder expansions are related to
each other: the steps to calculate one go most of the way towards
calculating the other, too.

This patch makes format_commit_message() parse the commit message
only once, remembering the position of each item.  This speeds up
handling of format strings containing multiple placeholders from the
set %s, %a*, %c*, %e, %b.

Here are the timings for the git version in next.  The first one is
to estimate the overhead of the caching, the second one is taken
from http://svn.tue.mpg.de/tentakel/trunk/tentakel/Makefile as an
example of a format string found in the wild.  The times are the
fastest of three consecutive runs in each case:

$ time git log --pretty=format:%e >/dev/null

real    0m0.381s
user    0m0.340s
sys     0m0.024s

$ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null

real    0m0.623s
user    0m0.556s
sys     0m0.052s

And here the times with this patch:

$ time git log --pretty=format:%e >/dev/null

real    0m0.385s
user    0m0.332s
sys     0m0.040s

$ time git log --pretty=format:"* %cd %cn%n%n%s%n%b" >/dev/null

real    0m0.563s
user    0m0.504s
sys     0m0.048s

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pretty.c | 124 +++++++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 82 insertions(+), 42 deletions(-)

diff --git a/pretty.c b/pretty.c
index 9fbd73f748..17a3010a6e 100644
--- a/pretty.c
+++ b/pretty.c
@@ -354,14 +354,67 @@ static void format_person_part(struct strbuf *sb, char part,
 	}
 }
 
-static void format_commit_item(struct strbuf *sb, const char *placeholder,
-                               void *context)
+struct chunk {
+	size_t off;
+	size_t len;
+};
+
+struct format_commit_context {
+	const struct commit *commit;
+
+	/* These offsets are relative to the start of the commit message. */
+	int commit_header_parsed;
+	struct chunk subject;
+	struct chunk author;
+	struct chunk committer;
+	struct chunk encoding;
+	size_t body_off;
+};
+
+static void parse_commit_header(struct format_commit_context *context)
 {
-	const struct commit *commit = context;
-	struct commit_list *p;
+	const char *msg = context->commit->buffer;
 	int i;
 	enum { HEADER, SUBJECT, BODY } state;
+
+	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
+		int eol;
+		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
+			; /* do nothing */
+
+		if (state == SUBJECT) {
+			context->subject.off = i;
+			context->subject.len = eol - i;
+			i = eol;
+		}
+		if (i == eol) {
+			state++;
+			/* strip empty lines */
+			while (msg[eol + 1] == '\n')
+				eol++;
+		} else if (!prefixcmp(msg + i, "author ")) {
+			context->author.off = i + 7;
+			context->author.len = eol - i - 7;
+		} else if (!prefixcmp(msg + i, "committer ")) {
+			context->committer.off = i + 10;
+			context->committer.len = eol - i - 10;
+		} else if (!prefixcmp(msg + i, "encoding ")) {
+			context->encoding.off = i + 9;
+			context->encoding.len = eol - i - 9;
+		}
+		i = eol;
+	}
+	context->body_off = i;
+	context->commit_header_parsed = 1;
+}
+
+static void format_commit_item(struct strbuf *sb, const char *placeholder,
+                               void *context)
+{
+	struct format_commit_context *c = context;
+	const struct commit *commit = c->commit;
 	const char *msg = commit->buffer;
+	struct commit_list *p;
 
 	/* these are independent of the commit */
 	switch (placeholder[0]) {
@@ -429,45 +482,28 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 	}
 
 	/* For the rest we have to parse the commit header. */
-	for (i = 0, state = HEADER; msg[i] && state < BODY; i++) {
-		int eol;
-		for (eol = i; msg[eol] && msg[eol] != '\n'; eol++)
-			; /* do nothing */
+	if (!c->commit_header_parsed)
+		parse_commit_header(c);
 
-		if (state == SUBJECT) {
-			if (placeholder[0] == 's') {
-				strbuf_add(sb, msg + i, eol - i);
-				return;
-			}
-			i = eol;
-		}
-		if (i == eol) {
-			state++;
-			/* strip empty lines */
-			while (msg[eol + 1] == '\n')
-				eol++;
-		} else if (!prefixcmp(msg + i, "author ")) {
-			if (placeholder[0] == 'a') {
-				format_person_part(sb, placeholder[1],
-				                   msg + i + 7, eol - i - 7);
-				return;
-			}
-		} else if (!prefixcmp(msg + i, "committer ")) {
-			if (placeholder[0] == 'c') {
-				format_person_part(sb, placeholder[1],
-				                   msg + i + 10, eol - i - 10);
-				return;
-			}
-		} else if (!prefixcmp(msg + i, "encoding ")) {
-			if (placeholder[0] == 'e') {
-				strbuf_add(sb, msg + i + 9, eol - i - 9);
-				return;
-			}
-		}
-		i = eol;
+	switch (placeholder[0]) {
+	case 's':
+		strbuf_add(sb, msg + c->subject.off, c->subject.len);
+		return;
+	case 'a':
+		format_person_part(sb, placeholder[1],
+		                   msg + c->author.off, c->author.len);
+		return;
+	case 'c':
+		format_person_part(sb, placeholder[1],
+		                   msg + c->committer.off, c->committer.len);
+		return;
+	case 'e':
+		strbuf_add(sb, msg + c->encoding.off, c->encoding.len);
+		return;
+	case 'b':
+		strbuf_addstr(sb, msg + c->body_off);
+		return;
 	}
-	if (msg[i] && placeholder[0] == 'b')	/* body */
-		strbuf_addstr(sb, msg + i);
 }
 
 void format_commit_message(const struct commit *commit,
@@ -505,7 +541,11 @@ void format_commit_message(const struct commit *commit,
 		"m",		/* left/right/bottom */
 		NULL
 	};
-	strbuf_expand(sb, format, placeholders, format_commit_item, (void *)commit);
+	struct format_commit_context context;
+
+	memset(&context, 0, sizeof(context));
+	context.commit = commit;
+	strbuf_expand(sb, format, placeholders, format_commit_item, &context);
 }
 
 static void pp_header(enum cmit_fmt fmt,
-- 
cgit v1.2.3


From 91db267ec849279053cf3ac3066c2f2c11db4321 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <rene.scharfe@lsrfire.ath.cx>
Date: Sat, 10 Nov 2007 12:16:05 +0100
Subject: add strbuf_adddup()

Add a new function, strbuf_adddup(), that appends a duplicate of a
part of a struct strbuf to end of the latter.

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 strbuf.c | 7 +++++++
 strbuf.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/strbuf.c b/strbuf.c
index 536b43204e..dbd8c4bcfb 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -106,6 +106,13 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len)
 	strbuf_setlen(sb, sb->len + len);
 }
 
+void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
+{
+	strbuf_grow(sb, len);
+	memcpy(sb->buf + sb->len, sb->buf + pos, len);
+	strbuf_setlen(sb, sb->len + len);
+}
+
 void strbuf_addf(struct strbuf *sb, const char *fmt, ...)
 {
 	int len;
diff --git a/strbuf.h b/strbuf.h
index 21d8944154..13919123dc 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -101,6 +101,7 @@ static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
 static inline void strbuf_addbuf(struct strbuf *sb, struct strbuf *sb2) {
 	strbuf_add(sb, sb2->buf, sb2->len);
 }
+extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len);
 
 typedef void (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context);
 extern void strbuf_expand(struct strbuf *sb, const char *format, const char **placeholders, expand_fn_t fn, void *context);
-- 
cgit v1.2.3


From b9c62321380374d09cc99570cdd1390674532832 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <rene.scharfe@lsrfire.ath.cx>
Date: Sat, 10 Nov 2007 12:18:26 +0100
Subject: --format=pretty: avoid calculating expensive expansions twice

As Jeff King remarked, format strings with duplicate placeholders can
be slow to expand, because each instance is calculated anew.

This patch makes use of the fact that format_commit_message() and its
helper functions only ever add stuff to the end of the strbuf.  For
certain expensive placeholders, store the offset and length of their
expansion with the strbuf at the first occurrence.  Later they
expansion result can simply be copied from there -- no malloc() or
strdup() required.

These certain placeholders are the abbreviated commit, tree and
parent hashes, as the search for a unique abbreviated hash is quite
costly.  Here are the times for next (best of three runs):

$ time git log --pretty=format:%h >/dev/null

real    0m0.611s
user    0m0.404s
sys     0m0.204s

$ time git log --pretty=format:%h%h%h%h >/dev/null

real    0m1.206s
user    0m0.744s
sys     0m0.452s

And here those with this patch (and the previous two); the speedup
of the single placeholder case is just noise:

$ time git log --pretty=format:%h >/dev/null

real    0m0.608s
user    0m0.416s
sys     0m0.192s

$ time git log --pretty=format:%h%h%h%h >/dev/null

real    0m0.639s
user    0m0.488s
sys     0m0.140s

Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 pretty.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/pretty.c b/pretty.c
index 17a3010a6e..9db75b4e4f 100644
--- a/pretty.c
+++ b/pretty.c
@@ -369,8 +369,30 @@ struct format_commit_context {
 	struct chunk committer;
 	struct chunk encoding;
 	size_t body_off;
+
+	/* The following ones are relative to the result struct strbuf. */
+	struct chunk abbrev_commit_hash;
+	struct chunk abbrev_tree_hash;
+	struct chunk abbrev_parent_hashes;
 };
 
+static int add_again(struct strbuf *sb, struct chunk *chunk)
+{
+	if (chunk->len) {
+		strbuf_adddup(sb, chunk->off, chunk->len);
+		return 1;
+	}
+
+	/*
+	 * We haven't seen this chunk before.  Our caller is surely
+	 * going to add it the hard way now.  Remember the most likely
+	 * start of the to-be-added chunk: the current end of the
+	 * struct strbuf.
+	 */
+	chunk->off = sb->len;
+	return 0;
+}
+
 static void parse_commit_header(struct format_commit_context *context)
 {
 	const char *msg = context->commit->buffer;
@@ -447,15 +469,21 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		strbuf_addstr(sb, sha1_to_hex(commit->object.sha1));
 		return;
 	case 'h':		/* abbreviated commit hash */
+		if (add_again(sb, &c->abbrev_commit_hash))
+			return;
 		strbuf_addstr(sb, find_unique_abbrev(commit->object.sha1,
 		                                     DEFAULT_ABBREV));
+		c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
 		return;
 	case 'T':		/* tree hash */
 		strbuf_addstr(sb, sha1_to_hex(commit->tree->object.sha1));
 		return;
 	case 't':		/* abbreviated tree hash */
+		if (add_again(sb, &c->abbrev_tree_hash))
+			return;
 		strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.sha1,
 		                                     DEFAULT_ABBREV));
+		c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
 		return;
 	case 'P':		/* parent hashes */
 		for (p = commit->parents; p; p = p->next) {
@@ -465,12 +493,16 @@ static void format_commit_item(struct strbuf *sb, const char *placeholder,
 		}
 		return;
 	case 'p':		/* abbreviated parent hashes */
+		if (add_again(sb, &c->abbrev_parent_hashes))
+			return;
 		for (p = commit->parents; p; p = p->next) {
 			if (p != commit->parents)
 				strbuf_addch(sb, ' ');
 			strbuf_addstr(sb, find_unique_abbrev(
 					p->item->object.sha1, DEFAULT_ABBREV));
 		}
+		c->abbrev_parent_hashes.len = sb->len -
+		                              c->abbrev_parent_hashes.off;
 		return;
 	case 'm':		/* left/right/bottom */
 		strbuf_addch(sb, (commit->object.flags & BOUNDARY)
-- 
cgit v1.2.3


From 9e79f00f06a5500b30941e6925adda070504e6cf Mon Sep 17 00:00:00 2001
From: Andreas Ericsson <ae@op5.se>
Date: Sat, 10 Nov 2007 12:55:48 +0100
Subject: Simplify strchrnul() compat code

strchrnul() was introduced in glibc in April 1999 and included in
glibc-2.1. Checking for that version means the majority of all git
users would get to use the optimized version in glibc. Of the
remaining few some might get to use a slightly slower version
than necessary but probably not slower than what we have today.

Unfortunately, __GLIBC_PREREQ() macro was not available in glibc 2.1.1
which was short lived but already supported strchrnul().  Odd minority
users of that library needs to live with our compatibility inline version.

Rediffed-against-next-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile           | 13 -------------
 compat/strchrnul.c |  8 --------
 git-compat-util.h  |  9 +++++++--
 3 files changed, 7 insertions(+), 23 deletions(-)
 delete mode 100644 compat/strchrnul.c

diff --git a/Makefile b/Makefile
index 69dcbb227e..621270f623 100644
--- a/Makefile
+++ b/Makefile
@@ -30,8 +30,6 @@ all::
 #
 # Define NO_MEMMEM if you don't have memmem.
 #
-# Define NO_STRCHRNUL if you don't have strchrnul.
-#
 # Define NO_STRLCPY if you don't have strlcpy.
 #
 # Define NO_STRTOUMAX if you don't have strtoumax in the C library.
@@ -408,7 +406,6 @@ ifeq ($(uname_S),Darwin)
 	OLD_ICONV = UnfortunatelyYes
 	NO_STRLCPY = YesPlease
 	NO_MEMMEM = YesPlease
-	NO_STRCHRNUL = YesPlease
 endif
 ifeq ($(uname_S),SunOS)
 	NEEDS_SOCKET = YesPlease
@@ -416,7 +413,6 @@ ifeq ($(uname_S),SunOS)
 	SHELL_PATH = /bin/bash
 	NO_STRCASESTR = YesPlease
 	NO_MEMMEM = YesPlease
-	NO_STRCHRNUL = YesPlease
 	NO_HSTRERROR = YesPlease
 	ifeq ($(uname_R),5.8)
 		NEEDS_LIBICONV = YesPlease
@@ -442,7 +438,6 @@ ifeq ($(uname_O),Cygwin)
 	NO_D_INO_IN_DIRENT = YesPlease
 	NO_STRCASESTR = YesPlease
 	NO_MEMMEM = YesPlease
-	NO_STRCHRNUL = YesPlease
 	NO_SYMLINK_HEAD = YesPlease
 	NEEDS_LIBICONV = YesPlease
 	NO_FAST_WORKING_DIRECTORY = UnfortunatelyYes
@@ -457,14 +452,12 @@ endif
 ifeq ($(uname_S),FreeBSD)
 	NEEDS_LIBICONV = YesPlease
 	NO_MEMMEM = YesPlease
-	NO_STRCHRNUL = YesPlease
 	BASIC_CFLAGS += -I/usr/local/include
 	BASIC_LDFLAGS += -L/usr/local/lib
 endif
 ifeq ($(uname_S),OpenBSD)
 	NO_STRCASESTR = YesPlease
 	NO_MEMMEM = YesPlease
-	NO_STRCHRNUL = YesPlease
 	NEEDS_LIBICONV = YesPlease
 	BASIC_CFLAGS += -I/usr/local/include
 	BASIC_LDFLAGS += -L/usr/local/lib
@@ -480,7 +473,6 @@ endif
 ifeq ($(uname_S),AIX)
 	NO_STRCASESTR=YesPlease
 	NO_MEMMEM = YesPlease
-	NO_STRCHRNUL = YesPlease
 	NO_STRLCPY = YesPlease
 	NEEDS_LIBICONV=YesPlease
 endif
@@ -493,7 +485,6 @@ ifeq ($(uname_S),IRIX64)
 	NO_SETENV=YesPlease
 	NO_STRCASESTR=YesPlease
 	NO_MEMMEM = YesPlease
-	NO_STRCHRNUL = YesPlease
 	NO_STRLCPY = YesPlease
 	NO_SOCKADDR_STORAGE=YesPlease
 	SHELL_PATH=/usr/gnu/bin/bash
@@ -704,10 +695,6 @@ ifdef NO_MEMMEM
 	COMPAT_CFLAGS += -DNO_MEMMEM
 	COMPAT_OBJS += compat/memmem.o
 endif
-ifdef NO_STRCHRNUL
-	COMPAT_CFLAGS += -DNO_STRCHRNUL
-	COMPAT_OBJS += compat/strchrnul.o
-endif
 
 ifdef THREADED_DELTA_SEARCH
 	BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH
diff --git a/compat/strchrnul.c b/compat/strchrnul.c
deleted file mode 100644
index 51839feb6e..0000000000
--- a/compat/strchrnul.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "../git-compat-util.h"
-
-char *gitstrchrnul(const char *s, int c)
-{
-	while (*s && *s != c)
-		s++;
-	return (char *)s;
-}
diff --git a/git-compat-util.h b/git-compat-util.h
index e72654bc40..92d79673f8 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -183,9 +183,14 @@ void *gitmemmem(const void *haystack, size_t haystacklen,
                 const void *needle, size_t needlelen);
 #endif
 
-#ifdef NO_STRCHRNUL
+#if !defined(__GLIBC_PREREQ) && !__GLIBC_PREREQ(2, 1)
 #define strchrnul gitstrchrnul
-char *gitstrchrnul(const char *s, int c);
+static inline char *gitstrchrnul(const char *s, int c)
+{
+	while (*s && *s != c)
+		s++;
+	return (char *)s;
+}
 #endif
 
 extern void release_pack_memory(size_t, int);
-- 
cgit v1.2.3


From 726c8ef5a5a129d8157d0043f60fe7195d2cdb77 Mon Sep 17 00:00:00 2001
From: Johannes Sixt <j.sixt@viscovery.net>
Date: Mon, 12 Nov 2007 11:09:05 +0100
Subject: Fix preprocessor logic that determines the availablity of
 strchrnul().

Apart from the error in the condition (&& should actually be ||), the
construct

    #if !defined(A) || !A

leads to a syntax error in the C preprocessor if A is indeed not defined.

Tested-by: David Symonds <dsymonds@gmail.com>
Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 git-compat-util.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index 92d79673f8..ede9408bbd 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -183,7 +183,13 @@ void *gitmemmem(const void *haystack, size_t haystacklen,
                 const void *needle, size_t needlelen);
 #endif
 
-#if !defined(__GLIBC_PREREQ) && !__GLIBC_PREREQ(2, 1)
+#ifdef __GLIBC_PREREQ
+#if __GLIBC_PREREQ(2, 1)
+#define HAVE_STRCHRNUL
+#endif
+#endif
+
+#ifndef HAVE_STRCHRNUL
 #define strchrnul gitstrchrnul
 static inline char *gitstrchrnul(const char *s, int c)
 {
-- 
cgit v1.2.3