22 files changed, 742 insertions, 11 deletions
diff --git a/t/perf/README b/t/perf/README
index 49ea4349be..21321a0f36 100644
--- a/t/perf/README
+++ b/t/perf/README
@@ -60,7 +60,22 @@ You can set the following variables (also in your config.mak):
 
     GIT_PERF_MAKE_OPTS
 	Options to use when automatically building a git tree for
-	performance testing.  E.g., -j6 would be useful.
+	performance testing. E.g., -j6 would be useful. Passed
+	directly to make as "make $GIT_PERF_MAKE_OPTS".
+
+    GIT_PERF_MAKE_COMMAND
+	An arbitrary command that'll be run in place of the make
+	command, if set the GIT_PERF_MAKE_OPTS variable is
+	ignored. Useful in cases where source tree changes might
+	require issuing a different make command to different
+	revisions.
+
+	This can be (ab)used to monkeypatch or otherwise change the
+	tree about to be built. Note that the build directory can be
+	re-used for subsequent runs so the make command might get
+	executed multiple times on the same tree, but don't count on
+	any of that, that's an implementation detail that might change
+	in the future.
 
     GIT_PERF_REPO
     GIT_PERF_LARGE_REPO
@@ -106,6 +121,7 @@ sources perf-lib.sh:
 
 After that you will want to use some of the following:
 
+	test_perf_fresh_repo    # sets up an empty repository
 	test_perf_default_repo  # sets up a "normal" repository
 	test_perf_large_repo    # sets up a "large" repository
 
diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl
index 924b19dab4..1dbc85b214 100755
--- a/t/perf/aggregate.perl
+++ b/t/perf/aggregate.perl
@@ -88,6 +88,7 @@ for my $t (@tests) {
 sub read_descr {
 	my $name = shift;
 	open my $fh, "<", $name or return "<error reading description>";
+	binmode $fh, ":utf8" or die "PANIC on binmode: $!";
 	my $line = <$fh>;
 	close $fh or die "cannot close $name";
 	chomp $line;
@@ -147,6 +148,8 @@ for my $t (@subtests) {
 my $totalwidth = 3*@dirs+$descrlen;
 $totalwidth += $_ for (@colwidth);
 
+binmode STDOUT, ":utf8" or die "PANIC on binmode: $!";
+
 printf "%-${descrlen}s", "Test";
 for my $i (0..$#dirs) {
 	my $d = $dirs[$i];
diff --git a/t/perf/p0000-perf-lib-sanity.sh b/t/perf/p0000-perf-lib-sanity.sh
index cf8e1efce7..002c21e52a 100755
--- a/t/perf/p0000-perf-lib-sanity.sh
+++ b/t/perf/p0000-perf-lib-sanity.sh
@@ -33,6 +33,8 @@ test_perf 'export a weird var' '
 	test_export bar
 '
 
+test_perf 'éḿíẗ ńöń-ÁŚĆÍÍ ćḧáŕáćẗéŕś' 'true'
+
 test_expect_success 'test_export works with weird vars' '
 	echo "$bar" &&
 	test "$bar" = "weird # variable"
diff --git a/t/perf/p0001-rev-list.sh b/t/perf/p0001-rev-list.sh
index 16359d51ae..ebf172401b 100755
--- a/t/perf/p0001-rev-list.sh
+++ b/t/perf/p0001-rev-list.sh
@@ -15,7 +15,8 @@ test_perf 'rev-list --all --objects' '
 '
 
 test_expect_success 'create new unreferenced commit' '
-	commit=$(git commit-tree HEAD^{tree} -p HEAD)
+	commit=$(git commit-tree HEAD^{tree} -p HEAD) &&
+	test_export commit
 '
 
 test_perf 'rev-list $commit --not --all' '
diff --git a/t/perf/p0004-lazy-init-name-hash.sh b/t/perf/p0004-lazy-init-name-hash.sh
new file mode 100755
index 0000000000..8de5a98cfc
--- /dev/null
+++ b/t/perf/p0004-lazy-init-name-hash.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+test_description='Tests multi-threaded lazy_init_name_hash'
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+test_expect_success 'verify both methods build the same hashmaps' '
+	test-lazy-init-name-hash --dump --single >out.single &&
+	if test-lazy-init-name-hash --dump --multi >out.multi
+	then
+		test_set_prereq REPO_BIG_ENOUGH_FOR_MULTI &&
+		sort <out.single >sorted.single &&
+		sort <out.multi >sorted.multi &&
+		test_cmp sorted.single sorted.multi
+	fi
+'
+
+test_expect_success 'calibrate' '
+	entries=$(wc -l <out.single) &&
+
+	case $entries in
+	?) count=1000000 ;;
+	??) count=100000 ;;
+	???) count=10000 ;;
+	????) count=1000 ;;
+	?????) count=100 ;;
+	??????) count=10 ;;
+	*) count=1 ;;
+	esac &&
+	export count &&
+
+	case $entries in
+	1) entries_desc="1 entry" ;;
+	*) entries_desc="$entries entries" ;;
+	esac &&
+
+	case $count in
+	1) count_desc="1 round" ;;
+	*) count_desc="$count rounds" ;;
+	esac &&
+
+	desc="$entries_desc, $count_desc" &&
+	export desc
+'
+
+test_perf "single-threaded, $desc" "
+	test-lazy-init-name-hash --single --count=$count
+"
+
+test_perf REPO_BIG_ENOUGH_FOR_MULTI "multi-threaded, $desc" "
+	test-lazy-init-name-hash --multi --count=$count
+"
+
+test_done
diff --git a/t/perf/p0005-status.sh b/t/perf/p0005-status.sh
new file mode 100755
index 0000000000..0b0aa9858f
--- /dev/null
+++ b/t/perf/p0005-status.sh
@@ -0,0 +1,49 @@
+#!/bin/sh
+#
+# This test measures the performance of various read-tree
+# and status operations.  It is primarily interested in
+# the algorithmic costs of index operations and recursive
+# tree traversal -- and NOT disk I/O on thousands of files.
+
+test_description="Tests performance of read-tree"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+# If the test repo was generated by ./repos/many-files.sh
+# then we know something about the data shape and branches,
+# so we can isolate testing to the ballast-related commits
+# and setup sparse-checkout so we don't have to populate
+# the ballast files and directories.
+#
+# Otherwise, we make some general assumptions about the
+# repo and consider the entire history of the current
+# branch to be the ballast.
+
+test_expect_success "setup repo" '
+	if git rev-parse --verify refs/heads/p0006-ballast^{commit}
+	then
+		echo Assuming synthetic repo from many-files.sh
+		git branch br_base            master
+		git branch br_ballast         p0006-ballast
+		git config --local core.sparsecheckout 1
+		cat >.git/info/sparse-checkout <<-EOF
+		/*
+		!ballast/*
+		EOF
+	else
+		echo Assuming non-synthetic repo...
+		git branch br_base            $(git rev-list HEAD | tail -n 1)
+		git branch br_ballast         HEAD
+	fi &&
+	git checkout -q br_ballast &&
+	nr_files=$(git ls-files | wc -l)
+'
+
+test_perf "read-tree status br_ballast ($nr_files)" '
+	git read-tree HEAD &&
+	git status
+'
+
+test_done
diff --git a/t/perf/p0006-read-tree-checkout.sh b/t/perf/p0006-read-tree-checkout.sh
new file mode 100755
index 0000000000..78cc23fe2f
--- /dev/null
+++ b/t/perf/p0006-read-tree-checkout.sh
@@ -0,0 +1,67 @@
+#!/bin/sh
+#
+# This test measures the performance of various read-tree
+# and checkout operations.  It is primarily interested in
+# the algorithmic costs of index operations and recursive
+# tree traversal -- and NOT disk I/O on thousands of files.
+
+test_description="Tests performance of read-tree"
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+# If the test repo was generated by ./repos/many-files.sh
+# then we know something about the data shape and branches,
+# so we can isolate testing to the ballast-related commits
+# and setup sparse-checkout so we don't have to populate
+# the ballast files and directories.
+#
+# Otherwise, we make some general assumptions about the
+# repo and consider the entire history of the current
+# branch to be the ballast.
+
+test_expect_success "setup repo" '
+	if git rev-parse --verify refs/heads/p0006-ballast^{commit}
+	then
+		echo Assuming synthetic repo from many-files.sh
+		git branch br_base            master
+		git branch br_ballast         p0006-ballast^
+		git branch br_ballast_alias   p0006-ballast^
+		git branch br_ballast_plus_1  p0006-ballast
+		git config --local core.sparsecheckout 1
+		cat >.git/info/sparse-checkout <<-EOF
+		/*
+		!ballast/*
+		EOF
+	else
+		echo Assuming non-synthetic repo...
+		git branch br_base            $(git rev-list HEAD | tail -n 1)
+		git branch br_ballast         HEAD^ || error "no ancestor commit from current head"
+		git branch br_ballast_alias   HEAD^
+		git branch br_ballast_plus_1  HEAD
+	fi &&
+	git checkout -q br_ballast &&
+	nr_files=$(git ls-files | wc -l)
+'
+
+test_perf "read-tree br_base br_ballast ($nr_files)" '
+	git read-tree -m br_base br_ballast -n
+'
+
+test_perf "switch between br_base br_ballast ($nr_files)" '
+	git checkout -q br_base &&
+	git checkout -q br_ballast
+'
+
+test_perf "switch between br_ballast br_ballast_plus_1 ($nr_files)" '
+	git checkout -q br_ballast_plus_1 &&
+	git checkout -q br_ballast
+'
+
+test_perf "switch between aliases ($nr_files)" '
+	git checkout -q br_ballast_alias &&
+	git checkout -q br_ballast
+'
+
+test_done
diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh
new file mode 100755
index 0000000000..7c9a35a646
--- /dev/null
+++ b/t/perf/p0071-sort.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+test_description='Basic sort performance tests'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+test_expect_success 'setup' '
+	git ls-files --stage "*.[ch]" "*.sh" |
+	cut -f2 -d" " |
+	git cat-file --batch >unsorted
+'
+
+test_perf 'sort(1)' '
+	sort <unsorted >expect
+'
+
+test_perf 'string_list_sort()' '
+	test-string-list sort <unsorted >actual
+'
+
+test_expect_success 'string_list_sort() sorts like sort(1)' '
+	test_cmp_bin expect actual
+'
+
+test_done
diff --git a/t/perf/p0100-globbing.sh b/t/perf/p0100-globbing.sh
new file mode 100755
index 0000000000..dd18a9ce2b
--- /dev/null
+++ b/t/perf/p0100-globbing.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+test_description="Tests pathological globbing performance
+
+Shows how Git's globbing performance performs when given the sort of
+pathological patterns described in at https://research.swtch.com/glob
+"
+
+. ./perf-lib.sh
+
+test_globs_big='10 25 50 75 100'
+test_globs_small='1 2 3 4 5 6'
+
+test_perf_fresh_repo
+
+test_expect_success 'setup' '
+	for i in $(test_seq 1 100)
+	do
+		printf "a" >>refname &&
+		for j in $(test_seq 1 $i)
+		do
+			printf "a*" >>refglob.$i
+		done &&
+		echo b >>refglob.$i
+	done &&
+	test_commit test $(cat refname).t "" $(cat refname).t
+'
+
+for i in $test_globs_small
+do
+	test_perf "refglob((a*)^nb) against tag (a^100).t; n = $i" '
+		git for-each-ref "refs/tags/$(cat refglob.'$i')b"
+	'
+done
+
+for i in $test_globs_small
+do
+	test_perf "fileglob((a*)^nb) against file (a^100).t; n = $i" '
+		git ls-files "$(cat refglob.'$i')b"
+	'
+done
+
+test_done
diff --git a/t/perf/p3400-rebase.sh b/t/perf/p3400-rebase.sh
index b3e7d525d2..ce271ca4c1 100755
--- a/t/perf/p3400-rebase.sh
+++ b/t/perf/p3400-rebase.sh
@@ -5,7 +5,7 @@ test_description='Tests rebase performance'
 
 test_perf_default_repo
 
-test_expect_success 'setup' '
+test_expect_success 'setup rebasing on top of a lot of changes' '
 	git checkout -f -b base &&
 	git checkout -b to-rebase &&
 	git checkout -b upstream &&
@@ -33,4 +33,24 @@ test_perf 'rebase on top of a lot of unrelated changes' '
 	git rebase --onto base HEAD^
 '
 
+test_expect_success 'setup rebasing many changes without split-index' '
+	git config core.splitIndex false &&
+	git checkout -b upstream2 to-rebase &&
+	git checkout -b to-rebase2 upstream
+'
+
+test_perf 'rebase a lot of unrelated changes without split-index' '
+	git rebase --onto upstream2 base &&
+	git rebase --onto base upstream2
+'
+
+test_expect_success 'setup rebasing many changes with split-index' '
+	git config core.splitIndex true
+'
+
+test_perf 'rebase a lot of unrelated changes with split-index' '
+	git rebase --onto upstream2 base &&
+	git rebase --onto base upstream2
+'
+
 test_done
diff --git a/t/perf/p4205-log-pretty-formats.sh b/t/perf/p4205-log-pretty-formats.sh
new file mode 100755
index 0000000000..7c26f4f337
--- /dev/null
+++ b/t/perf/p4205-log-pretty-formats.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+test_description='Tests the performance of various pretty format placeholders'
+
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+for format in %H %h %T %t %P %p %h-%h-%h
+do
+	test_perf "log with $format" "
+		git log --format=\"$format\" >/dev/null
+	"
+done
+
+test_done
diff --git a/t/perf/p4220-log-grep-engines.sh b/t/perf/p4220-log-grep-engines.sh
new file mode 100755
index 0000000000..2bc47ded4d
--- /dev/null
+++ b/t/perf/p4220-log-grep-engines.sh
@@ -0,0 +1,53 @@
+#!/bin/sh
+
+test_description="Comparison of git-log's --grep regex engines
+
+Set GIT_PERF_4220_LOG_OPTS in the environment to pass options to
+git-grep. Make sure to include a leading space,
+e.g. GIT_PERF_4220_LOG_OPTS=' -i'. Some options to try:
+
+	-i
+	--invert-grep
+	-i --invert-grep
+"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+for pattern in \
+	'how.to' \
+	'^how to' \
+	'[how] to' \
+	'\(e.t[^ ]*\|v.ry\) rare' \
+	'm\(ú\|u\)lt.b\(æ\|y\)te'
+do
+	for engine in basic extended perl
+	do
+		if test $engine != "basic"
+		then
+			# Poor man's basic -> extended converter.
+			pattern=$(echo $pattern | sed 's/\\//g')
+		fi
+		if test $engine = "perl" && ! test_have_prereq PCRE
+		then
+			prereq="PCRE"
+		else
+			prereq=""
+		fi
+		test_perf $prereq "$engine log$GIT_PERF_4220_LOG_OPTS --grep='$pattern'" "
+			git -c grep.patternType=$engine log --pretty=format:%h$GIT_PERF_4220_LOG_OPTS --grep='$pattern' >'out.$engine' || :
+		"
+	done
+
+	test_expect_success "assert that all engines found the same for$GIT_PERF_4220_LOG_OPTS '$pattern'" '
+		test_cmp out.basic out.extended &&
+		if test_have_prereq PCRE
+		then
+			test_cmp out.basic out.perl
+		fi
+	'
+done
+
+test_done
diff --git a/t/perf/p4221-log-grep-engines-fixed.sh b/t/perf/p4221-log-grep-engines-fixed.sh
new file mode 100755
index 0000000000..060971265a
--- /dev/null
+++ b/t/perf/p4221-log-grep-engines-fixed.sh
@@ -0,0 +1,44 @@
+#!/bin/sh
+
+test_description="Comparison of git-log's --grep regex engines with -F
+
+Set GIT_PERF_4221_LOG_OPTS in the environment to pass options to
+git-grep. Make sure to include a leading space,
+e.g. GIT_PERF_4221_LOG_OPTS=' -i'. Some options to try:
+
+	-i
+	--invert-grep
+	-i --invert-grep
+"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+for pattern in 'int' 'uncommon' 'æ'
+do
+	for engine in fixed basic extended perl
+	do
+		if test $engine = "perl" && ! test_have_prereq PCRE
+		then
+			prereq="PCRE"
+		else
+			prereq=""
+		fi
+		test_perf $prereq "$engine log$GIT_PERF_4221_LOG_OPTS --grep='$pattern'" "
+			git -c grep.patternType=$engine log --pretty=format:%h$GIT_PERF_4221_LOG_OPTS --grep='$pattern' >'out.$engine' || :
+		"
+	done
+
+	test_expect_success "assert that all engines found the same for$GIT_PERF_4221_LOG_OPTS '$pattern'" '
+		test_cmp out.fixed out.basic &&
+		test_cmp out.fixed out.extended &&
+		if test_have_prereq PCRE
+		then
+			test_cmp out.fixed out.perl
+		fi
+	'
+done
+
+test_done
diff --git a/t/perf/p5302-pack-index.sh b/t/perf/p5302-pack-index.sh
index 5ee9211f98..99bdb16c85 100755
--- a/t/perf/p5302-pack-index.sh
+++ b/t/perf/p5302-pack-index.sh
@@ -13,6 +13,13 @@ test_expect_success 'repack' '
 	export PACK
 '
 
+test_expect_success 'create target repositories' '
+	for repo in t1 t2 t3 t4 t5 t6
+	do
+		git init --bare $repo
+	done
+'
+
 test_perf 'index-pack 0 threads' '
 	GIT_DIR=t1 git index-pack --threads=1 --stdin < $PACK
 '
diff --git a/t/perf/p7000-filter-branch.sh b/t/perf/p7000-filter-branch.sh
index 15ee5d1d53..b029586ccb 100755
--- a/t/perf/p7000-filter-branch.sh
+++ b/t/perf/p7000-filter-branch.sh
@@ -16,4 +16,9 @@ test_perf 'noop filter' '
 	git filter-branch -f base..HEAD
 '
 
+test_perf 'noop prune-empty' '
+	git checkout --detach tip &&
+	git filter-branch -f --prune-empty base..HEAD
+'
+
 test_done
diff --git a/t/perf/p7820-grep-engines.sh b/t/perf/p7820-grep-engines.sh
new file mode 100755
index 0000000000..62aba19e76
--- /dev/null
+++ b/t/perf/p7820-grep-engines.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+
+test_description="Comparison of git-grep's regex engines
+
+Set GIT_PERF_7820_GREP_OPTS in the environment to pass options to
+git-grep. Make sure to include a leading space,
+e.g. GIT_PERF_7820_GREP_OPTS=' -i'. Some options to try:
+
+	-i
+	-w
+	-v
+	-vi
+	-vw
+	-viw
+"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+for pattern in \
+	'how.to' \
+	'^how to' \
+	'[how] to' \
+	'\(e.t[^ ]*\|v.ry\) rare' \
+	'm\(ú\|u\)lt.b\(æ\|y\)te'
+do
+	for engine in basic extended perl
+	do
+		if test $engine != "basic"
+		then
+			# Poor man's basic -> extended converter.
+			pattern=$(echo "$pattern" | sed 's/\\//g')
+		fi
+		if test $engine = "perl" && ! test_have_prereq PCRE
+		then
+			prereq="PCRE"
+		else
+			prereq=""
+		fi
+		test_perf $prereq "$engine grep$GIT_PERF_7820_GREP_OPTS '$pattern'" "
+			git -c grep.patternType=$engine grep$GIT_PERF_7820_GREP_OPTS -- '$pattern' >'out.$engine' || :
+		"
+	done
+
+	test_expect_success "assert that all engines found the same for$GIT_PERF_7820_GREP_OPTS '$pattern'" '
+		test_cmp out.basic out.extended &&
+		if test_have_prereq PCRE
+		then
+			test_cmp out.basic out.perl
+		fi
+	'
+done
+
+test_done
diff --git a/t/perf/p7821-grep-engines-fixed.sh b/t/perf/p7821-grep-engines-fixed.sh
new file mode 100755
index 0000000000..c7ef1e198f
--- /dev/null
+++ b/t/perf/p7821-grep-engines-fixed.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+test_description="Comparison of git-grep's regex engines with -F
+
+Set GIT_PERF_7821_GREP_OPTS in the environment to pass options to
+git-grep. Make sure to include a leading space,
+e.g. GIT_PERF_7821_GREP_OPTS=' -w'. See p7820-grep-engines.sh for more
+options to try.
+"
+
+. ./perf-lib.sh
+
+test_perf_large_repo
+test_checkout_worktree
+
+for pattern in 'int' 'uncommon' 'æ'
+do
+	for engine in fixed basic extended perl
+	do
+		if test $engine = "perl" && ! test_have_prereq PCRE
+		then
+			prereq="PCRE"
+		else
+			prereq=""
+		fi
+		test_perf $prereq "$engine grep$GIT_PERF_7821_GREP_OPTS $pattern" "
+			git -c grep.patternType=$engine grep$GIT_PERF_7821_GREP_OPTS $pattern >'out.$engine' || :
+		"
+	done
+
+	test_expect_success "assert that all engines found the same for$GIT_PERF_7821_GREP_OPTS $pattern" '
+		test_cmp out.fixed out.basic &&
+		test_cmp out.fixed out.extended &&
+		if test_have_prereq PCRE
+		then
+			test_cmp out.fixed out.perl
+		fi
+	'
+done
+
+test_done
diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh
index 46f08ee087..b50211b259 100644
--- a/t/perf/perf-lib.sh
+++ b/t/perf/perf-lib.sh
@@ -78,12 +78,16 @@ if test -z "$GIT_PERF_LARGE_REPO"; then
 	GIT_PERF_LARGE_REPO=$TEST_DIRECTORY/..
 fi
 
+test_perf_do_repo_symlink_config_ () {
+	test_have_prereq SYMLINKS || git config core.symlinks false
+}
+
 test_perf_create_repo_from () {
 	test "$#" = 2 ||
 	error "bug in the test script: not 2 parameters to test-create-repo"
 	repo="$1"
 	source="$2"
-	source_git="$(git -C "$source" rev-parse --git-dir)"
+	source_git="$("$MODERN_GIT" -C "$source" rev-parse --git-dir)"
 	objects_dir="$("$MODERN_GIT" -C "$source" rev-parse --git-path objects)"
 	mkdir -p "$repo/.git"
 	(
@@ -102,15 +106,29 @@ test_perf_create_repo_from () {
 	) &&
 	(
 		cd "$repo" &&
-		git init -q && {
-			test_have_prereq SYMLINKS ||
-			git config core.symlinks false
-		} &&
-		mv .git/hooks .git/hooks-disabled 2>/dev/null
+		"$MODERN_GIT" init -q &&
+		test_perf_do_repo_symlink_config_ &&
+		mv .git/hooks .git/hooks-disabled 2>/dev/null &&
+		if test -f .git/index.lock
+		then
+			# We may be copying a repo that can't run "git
+			# status" due to a locked index. Since we have
+			# a copy it's fine to remove the lock.
+			rm .git/index.lock
+		fi
 	) || error "failed to copy repository '$source' to '$repo'"
 }
 
 # call at least one of these to establish an appropriately-sized repository
+test_perf_fresh_repo () {
+	repo="${1:-$TRASH_DIRECTORY}"
+	"$MODERN_GIT" init -q "$repo" &&
+	(
+		cd "$repo" &&
+		test_perf_do_repo_symlink_config_
+	)
+}
+
 test_perf_default_repo () {
 	test_perf_create_repo_from "${1:-$TRASH_DIRECTORY}" "$GIT_PERF_REPO"
 }
diff --git a/t/perf/repos/.gitignore b/t/perf/repos/.gitignore
new file mode 100644
index 0000000000..72e3dc3e19
--- /dev/null
+++ b/t/perf/repos/.gitignore
@@ -0,0 +1 @@
+gen-*/
diff --git a/t/perf/repos/inflate-repo.sh b/t/perf/repos/inflate-repo.sh
new file mode 100755
index 0000000000..fcfc992b5b
--- /dev/null
+++ b/t/perf/repos/inflate-repo.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+# Inflate the size of an EXISTING repo.
+#
+# This script should be run inside the worktree of a TEST repo.
+# It will use the contents of the current HEAD to generate a
+# commit containing copies of the current worktree such that the
+# total size of the commit has at least <target_size> files.
+#
+# Usage: [-t target_size] [-b branch_name]
+
+set -e
+
+target_size=10000
+branch_name=p0006-ballast
+ballast=ballast
+
+while test "$#" -ne 0
+do
+    case "$1" in
+	-b)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -b requires an argument' >&2; exit 1; }
+	    branch_name=$1;
+	    shift ;;
+	-t)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -t requires an argument' >&2; exit 1; }
+	    target_size=$1;
+	    shift ;;
+	*)
+	    echo "error: unknown option '$1'" >&2; exit 1 ;;
+    esac
+done
+
+git ls-tree -r HEAD >GEN_src_list
+nr_src_files=$(cat GEN_src_list | wc -l)
+
+src_branch=$(git symbolic-ref --short HEAD)
+
+echo "Branch $src_branch initially has $nr_src_files files."
+
+if test $target_size -le $nr_src_files
+then
+    echo "Repository already exceeds target size $target_size."
+    rm GEN_src_list
+    exit 1
+fi
+
+# Create well-known branch and add 1 file change to start
+# if off before the ballast.
+git checkout -b $branch_name HEAD
+echo "$target_size" > inflate-repo.params
+git add inflate-repo.params
+git commit -q -m params
+
+# Create ballast for in our branch.
+copy=1
+nr_files=$nr_src_files
+while test $nr_files -lt $target_size
+do
+    sed -e "s|	|	$ballast/$copy/|" <GEN_src_list |
+	git update-index --index-info
+
+    nr_files=$(expr $nr_files + $nr_src_files)
+    copy=$(expr $copy + 1)
+done
+rm GEN_src_list
+git commit -q -m "ballast"
+
+# Modify 1 file and commit.
+echo "$target_size" >> inflate-repo.params
+git add inflate-repo.params
+git commit -q -m "ballast plus 1"
+
+nr_files=$(git ls-files | wc -l)
+
+# Checkout master to put repo in canonical state (because
+# the perf test may need to clone and enable sparse-checkout
+# before attempting to checkout a commit with the ballast
+# (because it may contain 100K directories and 1M files)).
+git checkout $src_branch
+
+echo "Repository inflated. Branch $branch_name has $nr_files files."
+
+exit 0
diff --git a/t/perf/repos/many-files.sh b/t/perf/repos/many-files.sh
new file mode 100755
index 0000000000..28720e4e10
--- /dev/null
+++ b/t/perf/repos/many-files.sh
@@ -0,0 +1,110 @@
+#!/bin/sh
+# Generate test data repository using the given parameters.
+# When omitted, we create "gen-many-files-d-w-f.git".
+#
+# Usage: [-r repo] [-d depth] [-w width] [-f files]
+#
+# -r repo: path to the new repo to be generated
+# -d depth: the depth of sub-directories
+# -w width: the number of sub-directories at each level
+# -f files: the number of files created in each directory
+#
+# Note that all files will have the same SHA-1 and each
+# directory at a level will have the same SHA-1, so we
+# will potentially have a large index, but not a large
+# ODB.
+#
+# Ballast will be created under "ballast/".
+
+EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+
+set -e
+
+# (5, 10, 9) will create 999,999 ballast files.
+# (4, 10, 9) will create  99,999 ballast files.
+depth=5
+width=10
+files=9
+
+while test "$#" -ne 0
+do
+    case "$1" in
+	-r)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; }
+	    repo=$1;
+	    shift ;;
+	-d)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; }
+	    depth=$1;
+	    shift ;;
+	-w)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; }
+	    width=$1;
+	    shift ;;
+	-f)
+	    shift;
+	    test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; }
+	    files=$1;
+	    shift ;;
+	*)
+	    echo "error: unknown option '$1'" >&2; exit 1 ;;
+	esac
+done
+
+# Inflate the index with thousands of empty files.
+# usage: dir depth width files
+fill_index() {
+	awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 '
+		function make_paths(dir, depth, width, files, f, w) {
+			for (f = 1; f <= files; f++) {
+				print dir "/file" f
+			}
+			if (depth > 0) {
+				for (w = 1; w <= width; w++) {
+					make_paths(dir "/dir" w, depth - 1, width, files)
+				}
+			}
+		}
+		END { make_paths(arg_dir, arg_depth, arg_width, arg_files) }
+		' </dev/null |
+	sed "s/^/100644 $EMPTY_BLOB	/" |
+	git update-index --index-info
+	return 0
+}
+
+[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git
+
+mkdir $repo
+cd $repo
+git init .
+
+# Create an initial commit just to define master.
+touch many-files.empty
+echo "$depth $width $files" >many-files.params
+git add many-files.*
+git commit -q -m params
+
+# Create ballast for p0006 based upon the given params and
+# inflate the index with thousands of empty files and commit.
+git checkout -b p0006-ballast
+fill_index "ballast" $depth $width $files
+git commit -q -m "ballast"
+
+nr_files=$(git ls-files | wc -l)
+
+# Modify 1 file and commit.
+echo "$depth $width $files" >>many-files.params
+git add many-files.params
+git commit -q -m "ballast plus 1"
+
+# Checkout master to put repo in canonical state (because
+# the perf test may need to clone and enable sparse-checkout
+# before attempting to checkout a commit with the ballast
+# (because it may contain 100K directories and 1M files)).
+git checkout master
+
+echo "Repository "$repo" ($depth, $width, $files) created.  Ballast $nr_files."
+exit 0
diff --git a/t/perf/run b/t/perf/run
index e8adedadfd..beb4acc0e4 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -24,6 +24,7 @@ run_one_dir () {
 
 unpack_git_rev () {
 	rev=$1
+	echo "=== Unpacking $rev in build/$rev ==="
 	mkdir -p build/$rev
 	(cd "$(git rev-parse --show-cdup)" && git archive --format=tar $rev) |
 	(cd build/$rev && tar x)
@@ -37,8 +38,16 @@ build_git_rev () {
 			cp "../../$config" "build/$rev/"
 		fi
 	done
-	(cd build/$rev && make $GIT_PERF_MAKE_OPTS) ||
-	die "failed to build revision '$mydir'"
+	echo "=== Building $rev ==="
+	(
+		cd build/$rev &&
+		if test -n "$GIT_PERF_MAKE_COMMAND"
+		then
+			sh -c "$GIT_PERF_MAKE_COMMAND"
+		else
+			make $GIT_PERF_MAKE_OPTS
+		fi
+	) || die "failed to build revision '$mydir'"
 }
 
 run_dirs_helper () {
@@ -63,6 +72,9 @@ run_dirs_helper () {
 		unset GIT_TEST_INSTALLED
 	else
 		GIT_TEST_INSTALLED="$mydir/bin-wrappers"
+		# Older versions of git lacked bin-wrappers; fallback to the
+		# files in the root.
+		test -d "$GIT_TEST_INSTALLED" || GIT_TEST_INSTALLED=$mydir
 		export GIT_TEST_INSTALLED
 	fi
 	run_one_dir "$@"