diff options
Diffstat (limited to 't/perf')
31 files changed, 1755 insertions, 143 deletions
diff --git a/t/perf/README b/t/perf/README index 49ea4349be..be12090c38 100644 --- a/t/perf/README +++ b/t/perf/README @@ -60,7 +60,22 @@ You can set the following variables (also in your config.mak): GIT_PERF_MAKE_OPTS Options to use when automatically building a git tree for - performance testing. E.g., -j6 would be useful. + performance testing. E.g., -j6 would be useful. Passed + directly to make as "make $GIT_PERF_MAKE_OPTS". + + GIT_PERF_MAKE_COMMAND + An arbitrary command that'll be run in place of the make + command, if set the GIT_PERF_MAKE_OPTS variable is + ignored. Useful in cases where source tree changes might + require issuing a different make command to different + revisions. + + This can be (ab)used to monkeypatch or otherwise change the + tree about to be built. Note that the build directory can be + re-used for subsequent runs so the make command might get + executed multiple times on the same tree, but don't count on + any of that, that's an implementation detail that might change + in the future. GIT_PERF_REPO GIT_PERF_LARGE_REPO @@ -106,6 +121,7 @@ sources perf-lib.sh: After that you will want to use some of the following: + test_perf_fresh_repo # sets up an empty repository test_perf_default_repo # sets up a "normal" repository test_perf_large_repo # sets up a "large" repository @@ -152,3 +168,28 @@ that While we have tried to make sure that it can cope with embedded whitespace and other special characters, it will not work with multi-line data. + +Rather than tracking the performance by run-time as `test_perf` does, you +may also track output size by using `test_size`. The stdout of the +function should be a single numeric value, which will be captured and +shown in the aggregated output. For example: + + test_perf 'time foo' ' + ./foo >foo.out + ' + + test_size 'output size' + wc -c <foo.out + ' + +might produce output like: + + Test origin HEAD + ------------------------------------------------------------- + 1234.1 time foo 0.37(0.79+0.02) 0.26(0.51+0.02) -29.7% + 1234.2 output size 4.3M 3.6M -14.7% + +The item being measured (and its units) is up to the test; the context +and the test title should make it clear to the user whether bigger or +smaller numbers are better. Unlike test_perf, the test code will only be +run once, since output sizes tend to be more deterministic than timings. diff --git a/t/perf/aggregate.perl b/t/perf/aggregate.perl index 924b19dab4..494907a892 100755 --- a/t/perf/aggregate.perl +++ b/t/perf/aggregate.perl @@ -1,8 +1,10 @@ #!/usr/bin/perl -use lib '../../perl/blib/lib'; +use lib '../../perl/build/lib'; use strict; use warnings; +use JSON; +use Getopt::Long; use Git; sub get_times { @@ -11,31 +13,89 @@ sub get_times { my $line = <$fh>; return undef if not defined $line; close $fh or die "cannot close $name: $!"; - $line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/ - or die "bad input line: $line"; - my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3; - return ($rt, $4, $5); + # times + if ($line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/) { + my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3; + return ($rt, $4, $5); + # size + } elsif ($line =~ /^\d+$/) { + return $&; + } else { + die "bad input line: $line"; + } +} + +sub relative_change { + my ($r, $firstr) = @_; + if ($firstr > 0) { + return sprintf "%+.1f%%", 100.0*($r-$firstr)/$firstr; + } elsif ($r == 0) { + return "="; + } else { + return "+inf"; + } } sub format_times { my ($r, $u, $s, $firstr) = @_; + # no value means we did not finish the test if (!defined $r) { return "<missing>"; } + # a single value means we have a size, not times + if (!defined $u) { + return format_size($r, $firstr); + } + # otherwise, we have real/user/system times my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s; - if (defined $firstr) { - if ($firstr > 0) { - $out .= sprintf " %+.1f%%", 100.0*($r-$firstr)/$firstr; - } elsif ($r == 0) { - $out .= " ="; - } else { - $out .= " +inf"; - } + $out .= ' ' . relative_change($r, $firstr) if defined $firstr; + return $out; +} + +sub usage { + print <<EOT; +./aggregate.perl [options] [--] [<dir_or_rev>...] [--] [<test_script>...] > + + Options: + --codespeed * Format output for Codespeed + --reponame <str> * Send given reponame to codespeed + --sort-by <str> * Sort output (only "regression" criteria is supported) + --subsection <str> * Use results from given subsection + +EOT + exit(1); +} + +sub human_size { + my $n = shift; + my @units = ('', qw(K M G)); + while ($n > 900 && @units > 1) { + $n /= 1000; + shift @units; } + return $n unless length $units[0]; + return sprintf '%.1f%s', $n, $units[0]; +} + +sub format_size { + my ($size, $first) = @_; + # match the width of a time: 0.00(0.00+0.00) + my $out = sprintf '%15s', human_size($size); + $out .= ' ' . relative_change($size, $first) if defined $first; return $out; } -my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests); +my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests, + $codespeed, $sortby, $subsection, $reponame); + +Getopt::Long::Configure qw/ require_order /; + +my $rc = GetOptions("codespeed" => \$codespeed, + "reponame=s" => \$reponame, + "sort-by=s" => \$sortby, + "subsection=s" => \$subsection); +usage() unless $rc; + while (scalar @ARGV) { my $arg = $ARGV[0]; my $dir; @@ -69,12 +129,24 @@ if (not @tests) { @tests = glob "p????-*.sh"; } +my $resultsdir = "test-results"; + +if (! $subsection and + exists $ENV{GIT_PERF_SUBSECTION} and + $ENV{GIT_PERF_SUBSECTION} ne "") { + $subsection = $ENV{GIT_PERF_SUBSECTION}; +} + +if ($subsection) { + $resultsdir .= "/" . $subsection; +} + my @subtests; my %shorttests; for my $t (@tests) { $t =~ s{(?:.*/)?(p(\d+)-[^/]+)\.sh$}{$1} or die "bad test name: $t"; my $n = $2; - my $fname = "test-results/$t.subtests"; + my $fname = "$resultsdir/$t.subtests"; open my $fp, "<", $fname or die "cannot open $fname: $!"; for (<$fp>) { chomp; @@ -88,19 +160,13 @@ for my $t (@tests) { sub read_descr { my $name = shift; open my $fh, "<", $name or return "<error reading description>"; + binmode $fh, ":utf8" or die "PANIC on binmode: $!"; my $line = <$fh>; close $fh or die "cannot close $name"; chomp $line; return $line; } -my %descrs; -my $descrlen = 4; # "Test" -for my $t (@subtests) { - $descrs{$t} = $shorttests{$t}.": ".read_descr("test-results/$t.descr"); - $descrlen = length $descrs{$t} if length $descrs{$t}>$descrlen; -} - sub have_duplicate { my %seen; for (@_) { @@ -116,52 +182,175 @@ sub have_slash { return 0; } -my %newdirabbrevs = %dirabbrevs; -while (!have_duplicate(values %newdirabbrevs)) { - %dirabbrevs = %newdirabbrevs; - last if !have_slash(values %dirabbrevs); - %newdirabbrevs = %dirabbrevs; - for (values %newdirabbrevs) { - s{^[^/]*/}{}; - } +sub display_dir { + my ($d) = @_; + return exists $dirabbrevs{$d} ? $dirabbrevs{$d} : $dirnames{$d}; } -my %times; -my @colwidth = ((0)x@dirs); -for my $i (0..$#dirs) { - my $d = $dirs[$i]; - my $w = length (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : $dirnames{$d}); - $colwidth[$i] = $w if $w > $colwidth[$i]; -} -for my $t (@subtests) { - my $firstr; +sub print_default_results { + my %descrs; + my $descrlen = 4; # "Test" + for my $t (@subtests) { + $descrs{$t} = $shorttests{$t}.": ".read_descr("$resultsdir/$t.descr"); + $descrlen = length $descrs{$t} if length $descrs{$t}>$descrlen; + } + + my %newdirabbrevs = %dirabbrevs; + while (!have_duplicate(values %newdirabbrevs)) { + %dirabbrevs = %newdirabbrevs; + last if !have_slash(values %dirabbrevs); + %newdirabbrevs = %dirabbrevs; + for (values %newdirabbrevs) { + s{^[^/]*/}{}; + } + } + + my %times; + my @colwidth = ((0)x@dirs); for my $i (0..$#dirs) { - my $d = $dirs[$i]; - $times{$prefixes{$d}.$t} = [get_times("test-results/$prefixes{$d}$t.times")]; - my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}}; - my $w = length format_times($r,$u,$s,$firstr); + my $w = length display_dir($dirs[$i]); $colwidth[$i] = $w if $w > $colwidth[$i]; - $firstr = $r unless defined $firstr; } -} -my $totalwidth = 3*@dirs+$descrlen; -$totalwidth += $_ for (@colwidth); + for my $t (@subtests) { + my $firstr; + for my $i (0..$#dirs) { + my $d = $dirs[$i]; + my $base = "$resultsdir/$prefixes{$d}$t"; + $times{$prefixes{$d}.$t} = []; + foreach my $type (qw(times size)) { + if (-e "$base.$type") { + $times{$prefixes{$d}.$t} = [get_times("$base.$type")]; + last; + } + } + my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}}; + my $w = length format_times($r,$u,$s,$firstr); + $colwidth[$i] = $w if $w > $colwidth[$i]; + $firstr = $r unless defined $firstr; + } + } + my $totalwidth = 3*@dirs+$descrlen; + $totalwidth += $_ for (@colwidth); -printf "%-${descrlen}s", "Test"; -for my $i (0..$#dirs) { - my $d = $dirs[$i]; - printf " %-$colwidth[$i]s", (exists $dirabbrevs{$d} ? $dirabbrevs{$d} : $dirnames{$d}); -} -print "\n"; -print "-"x$totalwidth, "\n"; -for my $t (@subtests) { - printf "%-${descrlen}s", $descrs{$t}; - my $firstr; + printf "%-${descrlen}s", "Test"; for my $i (0..$#dirs) { - my $d = $dirs[$i]; - my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}}; - printf " %-$colwidth[$i]s", format_times($r,$u,$s,$firstr); - $firstr = $r unless defined $firstr; + printf " %-$colwidth[$i]s", display_dir($dirs[$i]); } print "\n"; + print "-"x$totalwidth, "\n"; + for my $t (@subtests) { + printf "%-${descrlen}s", $descrs{$t}; + my $firstr; + for my $i (0..$#dirs) { + my $d = $dirs[$i]; + my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}}; + printf " %-$colwidth[$i]s", format_times($r,$u,$s,$firstr); + $firstr = $r unless defined $firstr; + } + print "\n"; + } +} + +sub print_sorted_results { + my ($sortby) = @_; + + if ($sortby ne "regression") { + print "Only 'regression' is supported as '--sort-by' argument\n"; + usage(); + } + + my @evolutions; + for my $t (@subtests) { + my ($prevr, $prevu, $prevs, $prevrev); + for my $i (0..$#dirs) { + my $d = $dirs[$i]; + my ($r, $u, $s) = get_times("$resultsdir/$prefixes{$d}$t.times"); + if ($i > 0 and defined $r and defined $prevr and $prevr > 0) { + my $percent = 100.0 * ($r - $prevr) / $prevr; + push @evolutions, { "percent" => $percent, + "test" => $t, + "prevrev" => $prevrev, + "rev" => $d, + "prevr" => $prevr, + "r" => $r, + "prevu" => $prevu, + "u" => $u, + "prevs" => $prevs, + "s" => $s}; + } + ($prevr, $prevu, $prevs, $prevrev) = ($r, $u, $s, $d); + } + } + + my @sorted_evolutions = sort { $b->{percent} <=> $a->{percent} } @evolutions; + + for my $e (@sorted_evolutions) { + printf "%+.1f%%", $e->{percent}; + print " " . $e->{test}; + print " " . format_times($e->{prevr}, $e->{prevu}, $e->{prevs}); + print " " . format_times($e->{r}, $e->{u}, $e->{s}); + print " " . display_dir($e->{prevrev}); + print " " . display_dir($e->{rev}); + print "\n"; + } +} + +sub print_codespeed_results { + my ($subsection) = @_; + + my $project = "Git"; + + my $executable = `uname -s -m`; + chomp $executable; + + if ($subsection) { + $executable .= ", " . $subsection; + } + + my $environment; + if ($reponame) { + $environment = $reponame; + } elsif (exists $ENV{GIT_PERF_REPO_NAME} and $ENV{GIT_PERF_REPO_NAME} ne "") { + $environment = $ENV{GIT_PERF_REPO_NAME}; + } elsif (exists $ENV{GIT_TEST_INSTALLED} and $ENV{GIT_TEST_INSTALLED} ne "") { + $environment = $ENV{GIT_TEST_INSTALLED}; + $environment =~ s|/bin-wrappers$||; + } else { + $environment = `uname -r`; + chomp $environment; + } + + my @data; + + for my $t (@subtests) { + for my $d (@dirs) { + my $commitid = $prefixes{$d}; + $commitid =~ s/^build_//; + $commitid =~ s/\.$//; + my ($result_value, $u, $s) = get_times("$resultsdir/$prefixes{$d}$t.times"); + + my %vals = ( + "commitid" => $commitid, + "project" => $project, + "branch" => $dirnames{$d}, + "executable" => $executable, + "benchmark" => $shorttests{$t} . " " . read_descr("$resultsdir/$t.descr"), + "environment" => $environment, + "result_value" => $result_value, + ); + push @data, \%vals; + } + } + + print to_json(\@data, {utf8 => 1, pretty => 1, canonical => 1}), "\n"; +} + +binmode STDOUT, ":utf8" or die "PANIC on binmode: $!"; + +if ($codespeed) { + print_codespeed_results($subsection); +} elsif (defined $sortby) { + print_sorted_results($sortby); +} else { + print_default_results(); } diff --git a/t/perf/bisect_regression b/t/perf/bisect_regression new file mode 100755 index 0000000000..a94d9955d0 --- /dev/null +++ b/t/perf/bisect_regression @@ -0,0 +1,73 @@ +#!/bin/sh + +# Read a line coming from `./aggregate.perl --sort-by regression ...` +# and automatically bisect to find the commit responsible for the +# performance regression. +# +# Lines from `./aggregate.perl --sort-by regression ...` look like: +# +# +100.0% p7821-grep-engines-fixed.1 0.04(0.10+0.03) 0.08(0.11+0.08) v2.14.3 v2.15.1 +# +33.3% p7820-grep-engines.1 0.03(0.08+0.02) 0.04(0.08+0.02) v2.14.3 v2.15.1 +# + +die () { + echo >&2 "error: $*" + exit 1 +} + +while [ $# -gt 0 ]; do + arg="$1" + case "$arg" in + --help) + echo "usage: $0 [--config file] [--subsection subsection]" + exit 0 + ;; + --config) + shift + GIT_PERF_CONFIG_FILE=$(cd "$(dirname "$1")"; pwd)/$(basename "$1") + export GIT_PERF_CONFIG_FILE + shift ;; + --subsection) + shift + GIT_PERF_SUBSECTION="$1" + export GIT_PERF_SUBSECTION + shift ;; + --*) + die "unrecognised option: '$arg'" ;; + *) + die "unknown argument '$arg'" + ;; + esac +done + +read -r regression subtest oldtime newtime oldrev newrev + +test_script=$(echo "$subtest" | sed -e 's/\(.*\)\.[0-9]*$/\1.sh/') +test_number=$(echo "$subtest" | sed -e 's/.*\.\([0-9]*\)$/\1/') + +# oldtime and newtime are decimal number, not integers + +oldtime=$(echo "$oldtime" | sed -e 's/^\([0-9]\+\.[0-9]\+\).*$/\1/') +newtime=$(echo "$newtime" | sed -e 's/^\([0-9]\+\.[0-9]\+\).*$/\1/') + +test $(echo "$newtime" "$oldtime" | awk '{ print ($1 > $2) }') = 1 || + die "New time '$newtime' shoud be greater than old time '$oldtime'" + +tmpdir=$(mktemp -d -t bisect_regression_XXXXXX) || die "Failed to create temp directory" +echo "$oldtime" >"$tmpdir/oldtime" || die "Failed to write to '$tmpdir/oldtime'" +echo "$newtime" >"$tmpdir/newtime" || die "Failed to write to '$tmpdir/newtime'" + +# Bisecting must be performed from the top level directory (even with --no-checkout) +( + toplevel_dir=$(git rev-parse --show-toplevel) || die "Failed to find top level directory" + cd "$toplevel_dir" || die "Failed to cd into top level directory '$toplevel_dir'" + + git bisect start --no-checkout "$newrev" "$oldrev" || die "Failed to start bisecting" + + git bisect run t/perf/bisect_run_script "$test_script" "$test_number" "$tmpdir" + res="$?" + + git bisect reset + + exit "$res" +) diff --git a/t/perf/bisect_run_script b/t/perf/bisect_run_script new file mode 100755 index 0000000000..3ebaf15521 --- /dev/null +++ b/t/perf/bisect_run_script @@ -0,0 +1,53 @@ +#!/bin/sh + +script="$1" +test_number="$2" +info_dir="$3" + +# This aborts the bisection immediately +die () { + echo >&2 "error: $*" + exit 255 +} + +bisect_head=$(git rev-parse --verify BISECT_HEAD) || die "Failed to find BISECT_HEAD ref" + +script_number=$(echo "$script" | sed -e "s/^p\([0-9]*\).*\$/\1/") || die "Failed to get script number for '$script'" + +oldtime=$(cat "$info_dir/oldtime") || die "Failed to access '$info_dir/oldtime'" +newtime=$(cat "$info_dir/newtime") || die "Failed to access '$info_dir/newtime'" + +cd t/perf || die "Failed to cd into 't/perf'" + +result_file="$info_dir/perf_${script_number}_${bisect_head}_results.txt" + +GIT_PERF_DIRS_OR_REVS="$bisect_head" +export GIT_PERF_DIRS_OR_REVS + +# Don't use codespeed +GIT_PERF_CODESPEED_OUTPUT= +GIT_PERF_SEND_TO_CODESPEED= +export GIT_PERF_CODESPEED_OUTPUT +export GIT_PERF_SEND_TO_CODESPEED + +./run "$script" >"$result_file" 2>&1 || die "Failed to run perf test '$script'" + +rtime=$(sed -n "s/^$script_number\.$test_number:.*\([0-9]\+\.[0-9]\+\)(.*).*\$/\1/p" "$result_file") + +echo "newtime: $newtime" +echo "rtime: $rtime" +echo "oldtime: $oldtime" + +# Compare ($newtime - $rtime) with ($rtime - $oldtime) +# Times are decimal number, not integers + +if test $(echo "$newtime" "$rtime" "$oldtime" | awk '{ print ($1 - $2 > $2 - $3) }') = 1 +then + # Current commit is considered "good/old" + echo "$rtime" >"$info_dir/oldtime" + exit 0 +else + # Current commit is considered "bad/new" + echo "$rtime" >"$info_dir/newtime" + exit 1 +fi diff --git a/t/perf/lib-pack.sh b/t/perf/lib-pack.sh new file mode 100644 index 0000000000..d3865db286 --- /dev/null +++ b/t/perf/lib-pack.sh @@ -0,0 +1,25 @@ +# Helpers for dealing with large numbers of packs. + +# create $1 nonsense packs, each with a single blob +create_packs () { + perl -le ' + my ($n) = @ARGV; + for (1..$n) { + print "blob"; + print "data <<EOF"; + print "$_"; + print "EOF"; + print "checkpoint" + } + ' "$@" | + git fast-import +} + +# create a large number of packs, disabling any gc which might +# cause us to repack them +setup_many_packs () { + git config gc.auto 0 && + git config gc.autopacklimit 0 && + git config fastimport.unpacklimit 0 && + create_packs 500 +} diff --git a/t/perf/p0000-perf-lib-sanity.sh b/t/perf/p0000-perf-lib-sanity.sh index cf8e1efce7..002c21e52a 100755 --- a/t/perf/p0000-perf-lib-sanity.sh +++ b/t/perf/p0000-perf-lib-sanity.sh @@ -33,6 +33,8 @@ test_perf 'export a weird var' ' test_export bar ' +test_perf 'éḿíẗ ńöń-ÁŚĆÍÍ ćḧáŕáćẗéŕś' 'true' + test_expect_success 'test_export works with weird vars' ' echo "$bar" && test "$bar" = "weird # variable" diff --git a/t/perf/p0002-read-cache.sh b/t/perf/p0002-read-cache.sh index 9180ae9343..cdd105a594 100755 --- a/t/perf/p0002-read-cache.sh +++ b/t/perf/p0002-read-cache.sh @@ -8,7 +8,7 @@ test_perf_default_repo count=1000 test_perf "read_cache/discard_cache $count times" " - test-read-cache $count + test-tool read-cache $count " test_done diff --git a/t/perf/p0004-lazy-init-name-hash.sh b/t/perf/p0004-lazy-init-name-hash.sh new file mode 100755 index 0000000000..1afc08fe7f --- /dev/null +++ b/t/perf/p0004-lazy-init-name-hash.sh @@ -0,0 +1,56 @@ +#!/bin/sh + +test_description='Tests multi-threaded lazy_init_name_hash' +. ./perf-lib.sh + +test_perf_large_repo +test_checkout_worktree + +test_expect_success 'verify both methods build the same hashmaps' ' + test-tool lazy-init-name-hash --dump --single >out.single && + if test-tool lazy-init-name-hash --dump --multi >out.multi + then + test_set_prereq REPO_BIG_ENOUGH_FOR_MULTI && + sort <out.single >sorted.single && + sort <out.multi >sorted.multi && + test_cmp sorted.single sorted.multi + fi +' + +test_expect_success 'calibrate' ' + entries=$(wc -l <out.single) && + + case $entries in + ?) count=1000000 ;; + ??) count=100000 ;; + ???) count=10000 ;; + ????) count=1000 ;; + ?????) count=100 ;; + ??????) count=10 ;; + *) count=1 ;; + esac && + export count && + + case $entries in + 1) entries_desc="1 entry" ;; + *) entries_desc="$entries entries" ;; + esac && + + case $count in + 1) count_desc="1 round" ;; + *) count_desc="$count rounds" ;; + esac && + + desc="$entries_desc, $count_desc" && + export desc +' + +test_perf "single-threaded, $desc" " + test-tool lazy-init-name-hash --single --count=$count +" + +test_perf REPO_BIG_ENOUGH_FOR_MULTI "multi-threaded, $desc" " + test-tool lazy-init-name-hash --multi --count=$count +" + +test_done diff --git a/t/perf/p0005-status.sh b/t/perf/p0005-status.sh new file mode 100755 index 0000000000..0b0aa9858f --- /dev/null +++ b/t/perf/p0005-status.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# +# This test measures the performance of various read-tree +# and status operations. It is primarily interested in +# the algorithmic costs of index operations and recursive +# tree traversal -- and NOT disk I/O on thousands of files. + +test_description="Tests performance of read-tree" + +. ./perf-lib.sh + +test_perf_default_repo + +# If the test repo was generated by ./repos/many-files.sh +# then we know something about the data shape and branches, +# so we can isolate testing to the ballast-related commits +# and setup sparse-checkout so we don't have to populate +# the ballast files and directories. +# +# Otherwise, we make some general assumptions about the +# repo and consider the entire history of the current +# branch to be the ballast. + +test_expect_success "setup repo" ' + if git rev-parse --verify refs/heads/p0006-ballast^{commit} + then + echo Assuming synthetic repo from many-files.sh + git branch br_base master + git branch br_ballast p0006-ballast + git config --local core.sparsecheckout 1 + cat >.git/info/sparse-checkout <<-EOF + /* + !ballast/* + EOF + else + echo Assuming non-synthetic repo... + git branch br_base $(git rev-list HEAD | tail -n 1) + git branch br_ballast HEAD + fi && + git checkout -q br_ballast && + nr_files=$(git ls-files | wc -l) +' + +test_perf "read-tree status br_ballast ($nr_files)" ' + git read-tree HEAD && + git status +' + +test_done diff --git a/t/perf/p0006-read-tree-checkout.sh b/t/perf/p0006-read-tree-checkout.sh new file mode 100755 index 0000000000..78cc23fe2f --- /dev/null +++ b/t/perf/p0006-read-tree-checkout.sh @@ -0,0 +1,67 @@ +#!/bin/sh +# +# This test measures the performance of various read-tree +# and checkout operations. It is primarily interested in +# the algorithmic costs of index operations and recursive +# tree traversal -- and NOT disk I/O on thousands of files. + +test_description="Tests performance of read-tree" + +. ./perf-lib.sh + +test_perf_default_repo + +# If the test repo was generated by ./repos/many-files.sh +# then we know something about the data shape and branches, +# so we can isolate testing to the ballast-related commits +# and setup sparse-checkout so we don't have to populate +# the ballast files and directories. +# +# Otherwise, we make some general assumptions about the +# repo and consider the entire history of the current +# branch to be the ballast. + +test_expect_success "setup repo" ' + if git rev-parse --verify refs/heads/p0006-ballast^{commit} + then + echo Assuming synthetic repo from many-files.sh + git branch br_base master + git branch br_ballast p0006-ballast^ + git branch br_ballast_alias p0006-ballast^ + git branch br_ballast_plus_1 p0006-ballast + git config --local core.sparsecheckout 1 + cat >.git/info/sparse-checkout <<-EOF + /* + !ballast/* + EOF + else + echo Assuming non-synthetic repo... + git branch br_base $(git rev-list HEAD | tail -n 1) + git branch br_ballast HEAD^ || error "no ancestor commit from current head" + git branch br_ballast_alias HEAD^ + git branch br_ballast_plus_1 HEAD + fi && + git checkout -q br_ballast && + nr_files=$(git ls-files | wc -l) +' + +test_perf "read-tree br_base br_ballast ($nr_files)" ' + git read-tree -m br_base br_ballast -n +' + +test_perf "switch between br_base br_ballast ($nr_files)" ' + git checkout -q br_base && + git checkout -q br_ballast +' + +test_perf "switch between br_ballast br_ballast_plus_1 ($nr_files)" ' + git checkout -q br_ballast_plus_1 && + git checkout -q br_ballast +' + +test_perf "switch between aliases ($nr_files)" ' + git checkout -q br_ballast_alias && + git checkout -q br_ballast +' + +test_done diff --git a/t/perf/p0007-write-cache.sh b/t/perf/p0007-write-cache.sh new file mode 100755 index 0000000000..09595264f0 --- /dev/null +++ b/t/perf/p0007-write-cache.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +test_description="Tests performance of writing the index" + +. ./perf-lib.sh + +test_perf_default_repo + +test_expect_success "setup repo" ' + if git rev-parse --verify refs/heads/p0006-ballast^{commit} + then + echo Assuming synthetic repo from many-files.sh + git config --local core.sparsecheckout 1 + cat >.git/info/sparse-checkout <<-EOF + /* + !ballast/* + EOF + else + echo Assuming non-synthetic repo... + fi && + nr_files=$(git ls-files | wc -l) +' + +count=3 +test_perf "write_locked_index $count times ($nr_files files)" " + test-tool write-cache $count +" + +test_done diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh index 7c9a35a646..6e924f5fa3 100755 --- a/t/perf/p0071-sort.sh +++ b/t/perf/p0071-sort.sh @@ -16,7 +16,7 @@ test_perf 'sort(1)' ' ' test_perf 'string_list_sort()' ' - test-string-list sort <unsorted >actual + test-tool string-list sort <unsorted >actual ' test_expect_success 'string_list_sort() sorts like sort(1)' ' diff --git a/t/perf/p0100-globbing.sh b/t/perf/p0100-globbing.sh new file mode 100755 index 0000000000..dd18a9ce2b --- /dev/null +++ b/t/perf/p0100-globbing.sh @@ -0,0 +1,43 @@ +#!/bin/sh + +test_description="Tests pathological globbing performance + +Shows how Git's globbing performance performs when given the sort of +pathological patterns described in at https://research.swtch.com/glob +" + +. ./perf-lib.sh + +test_globs_big='10 25 50 75 100' +test_globs_small='1 2 3 4 5 6' + +test_perf_fresh_repo + +test_expect_success 'setup' ' + for i in $(test_seq 1 100) + do + printf "a" >>refname && + for j in $(test_seq 1 $i) + do + printf "a*" >>refglob.$i + done && + echo b >>refglob.$i + done && + test_commit test $(cat refname).t "" $(cat refname).t +' + +for i in $test_globs_small +do + test_perf "refglob((a*)^nb) against tag (a^100).t; n = $i" ' + git for-each-ref "refs/tags/$(cat refglob.'$i')b" + ' +done + +for i in $test_globs_small +do + test_perf "fileglob((a*)^nb) against file (a^100).t; n = $i" ' + git ls-files "$(cat refglob.'$i')b" + ' +done + +test_done diff --git a/t/perf/p1450-fsck.sh b/t/perf/p1450-fsck.sh new file mode 100755 index 0000000000..ae1b84198b --- /dev/null +++ b/t/perf/p1450-fsck.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +test_description='Test fsck performance' + +. ./perf-lib.sh + +test_perf_large_repo + +test_perf 'fsck' ' + git fsck +' + +test_done diff --git a/t/perf/p1451-fsck-skip-list.sh b/t/perf/p1451-fsck-skip-list.sh new file mode 100755 index 0000000000..c2b97d2487 --- /dev/null +++ b/t/perf/p1451-fsck-skip-list.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +test_description='Test fsck skipList performance' + +. ./perf-lib.sh + +test_perf_fresh_repo + +n=1000000 + +test_expect_success "setup $n bad commits" ' + for i in $(test_seq 1 $n) + do + echo "commit refs/heads/master" && + echo "committer C <c@example.com> 1234567890 +0000" && + echo "data <<EOF" && + echo "$i.Q." && + echo "EOF" + done | q_to_nul | git fast-import +' + +skip=0 +while test $skip -le $n +do + test_expect_success "create skipList for $skip bad commits" ' + git log --format=%H --max-count=$skip | + sort >skiplist + ' + + test_perf "fsck with $skip skipped bad commits" ' + git -c fsck.skipList=skiplist fsck + ' + + case $skip in + 0) skip=1 ;; + *) skip=${skip}0 ;; + esac +done + +test_done diff --git a/t/perf/p3400-rebase.sh b/t/perf/p3400-rebase.sh index b3e7d525d2..d202aaed06 100755 --- a/t/perf/p3400-rebase.sh +++ b/t/perf/p3400-rebase.sh @@ -5,10 +5,10 @@ test_description='Tests rebase performance' test_perf_default_repo -test_expect_success 'setup' ' - git checkout -f -b base && - git checkout -b to-rebase && - git checkout -b upstream && +test_expect_success 'setup rebasing on top of a lot of changes' ' + git checkout -f -B base && + git checkout -B to-rebase && + git checkout -B upstream && for i in $(seq 100) do # simulate huge diffs @@ -33,4 +33,24 @@ test_perf 'rebase on top of a lot of unrelated changes' ' git rebase --onto base HEAD^ ' +test_expect_success 'setup rebasing many changes without split-index' ' + git config core.splitIndex false && + git checkout -B upstream2 to-rebase && + git checkout -B to-rebase2 upstream +' + +test_perf 'rebase a lot of unrelated changes without split-index' ' + git rebase --onto upstream2 base && + git rebase --onto base upstream2 +' + +test_expect_success 'setup rebasing many changes with split-index' ' + git config core.splitIndex true +' + +test_perf 'rebase a lot of unrelated changes with split-index' ' + git rebase --onto upstream2 base && + git rebase --onto base upstream2 +' + test_done diff --git a/t/perf/p4205-log-pretty-formats.sh b/t/perf/p4205-log-pretty-formats.sh new file mode 100755 index 0000000000..7c26f4f337 --- /dev/null +++ b/t/perf/p4205-log-pretty-formats.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +test_description='Tests the performance of various pretty format placeholders' + +. ./perf-lib.sh + +test_perf_default_repo + +for format in %H %h %T %t %P %p %h-%h-%h +do + test_perf "log with $format" " + git log --format=\"$format\" >/dev/null + " +done + +test_done diff --git a/t/perf/p4211-line-log.sh b/t/perf/p4211-line-log.sh index b7ff68d4fa..392bcc0e51 100755 --- a/t/perf/p4211-line-log.sh +++ b/t/perf/p4211-line-log.sh @@ -31,4 +31,12 @@ test_perf 'git log -L (renames on)' ' git log -M -L 1:"$file" >/dev/null ' +test_perf 'git log --oneline --raw --parents' ' + git log --oneline --raw --parents >/dev/null +' + +test_perf 'git log --oneline --raw --parents -1000' ' + git log --oneline --raw --parents -1000 >/dev/null +' + test_done diff --git a/t/perf/p4220-log-grep-engines.sh b/t/perf/p4220-log-grep-engines.sh new file mode 100755 index 0000000000..2bc47ded4d --- /dev/null +++ b/t/perf/p4220-log-grep-engines.sh @@ -0,0 +1,53 @@ +#!/bin/sh + +test_description="Comparison of git-log's --grep regex engines + +Set GIT_PERF_4220_LOG_OPTS in the environment to pass options to +git-grep. Make sure to include a leading space, +e.g. GIT_PERF_4220_LOG_OPTS=' -i'. Some options to try: + + -i + --invert-grep + -i --invert-grep +" + +. ./perf-lib.sh + +test_perf_large_repo +test_checkout_worktree + +for pattern in \ + 'how.to' \ + '^how to' \ + '[how] to' \ + '\(e.t[^ ]*\|v.ry\) rare' \ + 'm\(ú\|u\)lt.b\(æ\|y\)te' +do + for engine in basic extended perl + do + if test $engine != "basic" + then + # Poor man's basic -> extended converter. + pattern=$(echo $pattern | sed 's/\\//g') + fi + if test $engine = "perl" && ! test_have_prereq PCRE + then + prereq="PCRE" + else + prereq="" + fi + test_perf $prereq "$engine log$GIT_PERF_4220_LOG_OPTS --grep='$pattern'" " + git -c grep.patternType=$engine log --pretty=format:%h$GIT_PERF_4220_LOG_OPTS --grep='$pattern' >'out.$engine' || : + " + done + + test_expect_success "assert that all engines found the same for$GIT_PERF_4220_LOG_OPTS '$pattern'" ' + test_cmp out.basic out.extended && + if test_have_prereq PCRE + then + test_cmp out.basic out.perl + fi + ' +done + +test_done diff --git a/t/perf/p4221-log-grep-engines-fixed.sh b/t/perf/p4221-log-grep-engines-fixed.sh new file mode 100755 index 0000000000..060971265a --- /dev/null +++ b/t/perf/p4221-log-grep-engines-fixed.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +test_description="Comparison of git-log's --grep regex engines with -F + +Set GIT_PERF_4221_LOG_OPTS in the environment to pass options to +git-grep. Make sure to include a leading space, +e.g. GIT_PERF_4221_LOG_OPTS=' -i'. Some options to try: + + -i + --invert-grep + -i --invert-grep +" + +. ./perf-lib.sh + +test_perf_large_repo +test_checkout_worktree + +for pattern in 'int' 'uncommon' 'æ' +do + for engine in fixed basic extended perl + do + if test $engine = "perl" && ! test_have_prereq PCRE + then + prereq="PCRE" + else + prereq="" + fi + test_perf $prereq "$engine log$GIT_PERF_4221_LOG_OPTS --grep='$pattern'" " + git -c grep.patternType=$engine log --pretty=format:%h$GIT_PERF_4221_LOG_OPTS --grep='$pattern' >'out.$engine' || : + " + done + + test_expect_success "assert that all engines found the same for$GIT_PERF_4221_LOG_OPTS '$pattern'" ' + test_cmp out.fixed out.basic && + test_cmp out.fixed out.extended && + if test_have_prereq PCRE + then + test_cmp out.fixed out.perl + fi + ' +done + +test_done diff --git a/t/perf/p5311-pack-bitmaps-fetch.sh b/t/perf/p5311-pack-bitmaps-fetch.sh new file mode 100755 index 0000000000..b04575951f --- /dev/null +++ b/t/perf/p5311-pack-bitmaps-fetch.sh @@ -0,0 +1,45 @@ +#!/bin/sh + +test_description='performance of fetches from bitmapped packs' +. ./perf-lib.sh + +test_perf_default_repo + +test_expect_success 'create bitmapped server repo' ' + git config pack.writebitmaps true && + git config pack.writebitmaphashcache true && + git repack -ad +' + +# simulate a fetch from a repository that last fetched N days ago, for +# various values of N. We do so by following the first-parent chain, +# and assume the first entry in the chain that is N days older than the current +# HEAD is where the HEAD would have been then. +for days in 1 2 4 8 16 32 64 128; do + title=$(printf '%10s' "($days days)") + test_expect_success "setup revs from $days days ago" ' + now=$(git log -1 --format=%ct HEAD) && + then=$(($now - ($days * 86400))) && + tip=$(git rev-list -1 --first-parent --until=$then HEAD) && + { + echo HEAD && + echo ^$tip + } >revs + ' + + test_perf "server $title" ' + git pack-objects --stdout --revs \ + --thin --delta-base-offset \ + <revs >tmp.pack + ' + + test_size "size $title" ' + wc -c <tmp.pack + ' + + test_perf "client $title" ' + git index-pack --stdin --fix-thin <tmp.pack + ' +done + +test_done diff --git a/t/perf/p5550-fetch-tags.sh b/t/perf/p5550-fetch-tags.sh index a5dc39f86a..d0e0e019ea 100755 --- a/t/perf/p5550-fetch-tags.sh +++ b/t/perf/p5550-fetch-tags.sh @@ -20,6 +20,7 @@ start to show a noticeable performance problem on my machine, but without taking too long to set up and run the tests. ' . ./perf-lib.sh +. "$TEST_DIRECTORY/perf/lib-pack.sh" # make a long nonsense history on branch $1, consisting of $2 commits, each # with a unique file pointing to the blob at $2. @@ -44,26 +45,6 @@ create_tags () { git update-ref --stdin } -# create $1 nonsense packs, each with a single blob -create_packs () { - perl -le ' - my ($n) = @ARGV; - for (1..$n) { - print "blob"; - print "data <<EOF"; - print "$_"; - print "EOF"; - } - ' "$@" | - git fast-import && - - git cat-file --batch-all-objects --batch-check='%(objectname)' | - while read sha1 - do - echo $sha1 | git pack-objects .git/objects/pack/pack - done -} - test_expect_success 'create parent and child' ' git init parent && git -C parent commit --allow-empty -m base && @@ -84,9 +65,7 @@ test_expect_success 'populate parent tags' ' test_expect_success 'create child packs' ' ( cd child && - git config gc.auto 0 && - git config gc.autopacklimit 0 && - create_packs 500 + setup_many_packs ) ' diff --git a/t/perf/p5551-fetch-rescan.sh b/t/perf/p5551-fetch-rescan.sh new file mode 100755 index 0000000000..b99dc23e32 --- /dev/null +++ b/t/perf/p5551-fetch-rescan.sh @@ -0,0 +1,55 @@ +#!/bin/sh + +test_description='fetch performance with many packs + +It is common for fetch to consider objects that we might not have, and it is an +easy mistake for the code to use a function like `parse_object` that might +give the correct _answer_ on such an object, but do so slowly (due to +re-scanning the pack directory for lookup failures). + +The resulting performance drop can be hard to notice in a real repository, but +becomes quite large in a repository with a large number of packs. So this +test creates a more pathological case, since any mistakes would produce a more +noticeable slowdown. +' +. ./perf-lib.sh +. "$TEST_DIRECTORY"/perf/lib-pack.sh + +test_expect_success 'create parent and child' ' + git init parent && + git clone parent child +' + + +test_expect_success 'create refs in the parent' ' + ( + cd parent && + git commit --allow-empty -m foo && + head=$(git rev-parse HEAD) && + test_seq 1000 | + sed "s,.*,update refs/heads/& $head," | + $MODERN_GIT update-ref --stdin + ) +' + +test_expect_success 'create many packs in the child' ' + ( + cd child && + setup_many_packs + ) +' + +test_perf 'fetch' ' + # start at the same state for each iteration + obj=$($MODERN_GIT -C parent rev-parse HEAD) && + ( + cd child && + $MODERN_GIT for-each-ref --format="delete %(refname)" refs/remotes | + $MODERN_GIT update-ref --stdin && + rm -vf .git/objects/$(echo $obj | sed "s|^..|&/|") && + + git fetch + ) +' + +test_done diff --git a/t/perf/p7519-fsmonitor.sh b/t/perf/p7519-fsmonitor.sh new file mode 100755 index 0000000000..def7ecdbc7 --- /dev/null +++ b/t/perf/p7519-fsmonitor.sh @@ -0,0 +1,183 @@ +#!/bin/sh + +test_description="Test core.fsmonitor" + +. ./perf-lib.sh + +# +# Performance test for the fsmonitor feature which enables git to talk to a +# file system change monitor and avoid having to scan the working directory +# for new or modified files. +# +# By default, the performance test will utilize the Watchman file system +# monitor if it is installed. If Watchman is not installed, it will use a +# dummy integration script that does not report any new or modified files. +# The dummy script has very little overhead which provides optimistic results. +# +# The performance test will also use the untracked cache feature if it is +# available as fsmonitor uses it to speed up scanning for untracked files. +# +# There are 3 environment variables that can be used to alter the default +# behavior of the performance test: +# +# GIT_PERF_7519_UNTRACKED_CACHE: used to configure core.untrackedCache +# GIT_PERF_7519_SPLIT_INDEX: used to configure core.splitIndex +# GIT_PERF_7519_FSMONITOR: used to configure core.fsMonitor +# +# The big win for using fsmonitor is the elimination of the need to scan the +# working directory looking for changed and untracked files. If the file +# information is all cached in RAM, the benefits are reduced. +# +# GIT_PERF_7519_DROP_CACHE: if set, the OS caches are dropped between tests +# + +test_perf_large_repo +test_checkout_worktree + +test_lazy_prereq UNTRACKED_CACHE ' + { git update-index --test-untracked-cache; ret=$?; } && + test $ret -ne 1 +' + +test_lazy_prereq WATCHMAN ' + command -v watchman +' + +if test_have_prereq WATCHMAN +then + # Convert unix style paths to escaped Windows style paths for Watchman + case "$(uname -s)" in + MSYS_NT*) + GIT_WORK_TREE="$(cygpath -aw "$PWD" | sed 's,\\,/,g')" + ;; + *) + GIT_WORK_TREE="$PWD" + ;; + esac +fi + +if test -n "$GIT_PERF_7519_DROP_CACHE" +then + # When using GIT_PERF_7519_DROP_CACHE, GIT_PERF_REPEAT_COUNT must be 1 to + # generate valid results. Otherwise the caching that happens for the nth + # run will negate the validity of the comparisons. + if test "$GIT_PERF_REPEAT_COUNT" -ne 1 + then + echo "warning: Setting GIT_PERF_REPEAT_COUNT=1" >&2 + GIT_PERF_REPEAT_COUNT=1 + fi +fi + +test_expect_success "setup for fsmonitor" ' + # set untrackedCache depending on the environment + if test -n "$GIT_PERF_7519_UNTRACKED_CACHE" + then + git config core.untrackedCache "$GIT_PERF_7519_UNTRACKED_CACHE" + else + if test_have_prereq UNTRACKED_CACHE + then + git config core.untrackedCache true + else + git config core.untrackedCache false + fi + fi && + + # set core.splitindex depending on the environment + if test -n "$GIT_PERF_7519_SPLIT_INDEX" + then + git config core.splitIndex "$GIT_PERF_7519_SPLIT_INDEX" + fi && + + # set INTEGRATION_SCRIPT depending on the environment + if test -n "$GIT_PERF_7519_FSMONITOR" + then + INTEGRATION_SCRIPT="$GIT_PERF_7519_FSMONITOR" + else + # + # Choose integration script based on existence of Watchman. + # If Watchman exists, watch the work tree and attempt a query. + # If everything succeeds, use Watchman integration script, + # else fall back to an empty integration script. + # + mkdir .git/hooks && + if test_have_prereq WATCHMAN + then + INTEGRATION_SCRIPT=".git/hooks/fsmonitor-watchman" && + cp "$TEST_DIRECTORY/../templates/hooks--fsmonitor-watchman.sample" "$INTEGRATION_SCRIPT" && + watchman watch "$GIT_WORK_TREE" && + watchman watch-list | grep -q -F "$GIT_WORK_TREE" + else + INTEGRATION_SCRIPT=".git/hooks/fsmonitor-empty" && + write_script "$INTEGRATION_SCRIPT"<<-\EOF + EOF + fi + fi && + + git config core.fsmonitor "$INTEGRATION_SCRIPT" && + git update-index --fsmonitor +' + +if test -n "$GIT_PERF_7519_DROP_CACHE"; then + test-tool drop-caches +fi + +test_perf "status (fsmonitor=$INTEGRATION_SCRIPT)" ' + git status +' + +if test -n "$GIT_PERF_7519_DROP_CACHE"; then + test-tool drop-caches +fi + +test_perf "status -uno (fsmonitor=$INTEGRATION_SCRIPT)" ' + git status -uno +' + +if test -n "$GIT_PERF_7519_DROP_CACHE"; then + test-tool drop-caches +fi + +test_perf "status -uall (fsmonitor=$INTEGRATION_SCRIPT)" ' + git status -uall +' + +test_expect_success "setup without fsmonitor" ' + unset INTEGRATION_SCRIPT && + git config --unset core.fsmonitor && + git update-index --no-fsmonitor +' + +if test -n "$GIT_PERF_7519_DROP_CACHE"; then + test-tool drop-caches +fi + +test_perf "status (fsmonitor=$INTEGRATION_SCRIPT)" ' + git status +' + +if test -n "$GIT_PERF_7519_DROP_CACHE"; then + test-tool drop-caches +fi + +test_perf "status -uno (fsmonitor=$INTEGRATION_SCRIPT)" ' + git status -uno +' + +if test -n "$GIT_PERF_7519_DROP_CACHE"; then + test-tool drop-caches +fi + +test_perf "status -uall (fsmonitor=$INTEGRATION_SCRIPT)" ' + git status -uall +' + +if test_have_prereq WATCHMAN +then + watchman watch-del "$GIT_WORK_TREE" >/dev/null 2>&1 && + + # Work around Watchman bug on Windows where it holds on to handles + # preventing the removal of the trash directory + watchman shutdown-server >/dev/null 2>&1 +fi + +test_done diff --git a/t/perf/p7820-grep-engines.sh b/t/perf/p7820-grep-engines.sh new file mode 100755 index 0000000000..8b09c5bf32 --- /dev/null +++ b/t/perf/p7820-grep-engines.sh @@ -0,0 +1,88 @@ +#!/bin/sh + +test_description="Comparison of git-grep's regex engines + +Set GIT_PERF_7820_GREP_OPTS in the environment to pass options to +git-grep. Make sure to include a leading space, +e.g. GIT_PERF_7820_GREP_OPTS=' -i'. Some options to try: + + -i + -w + -v + -vi + -vw + -viw + +If GIT_PERF_GREP_THREADS is set to a list of threads (e.g. '1 4 8' +etc.) we will test the patterns under those numbers of threads. +" + +. ./perf-lib.sh + +test_perf_large_repo +test_checkout_worktree + +if test -n "$GIT_PERF_GREP_THREADS" +then + test_set_prereq PERF_GREP_ENGINES_THREADS +fi + +for pattern in \ + 'how.to' \ + '^how to' \ + '[how] to' \ + '\(e.t[^ ]*\|v.ry\) rare' \ + 'm\(ú\|u\)lt.b\(æ\|y\)te' +do + for engine in basic extended perl + do + if test $engine != "basic" + then + # Poor man's basic -> extended converter. + pattern=$(echo "$pattern" | sed 's/\\//g') + fi + if test $engine = "perl" && ! test_have_prereq PCRE + then + prereq="PCRE" + else + prereq="" + fi + if ! test_have_prereq PERF_GREP_ENGINES_THREADS + then + test_perf $prereq "$engine grep$GIT_PERF_7820_GREP_OPTS '$pattern'" " + git -c grep.patternType=$engine grep$GIT_PERF_7820_GREP_OPTS -- '$pattern' >'out.$engine' || : + " + else + for threads in $GIT_PERF_GREP_THREADS + do + test_perf PTHREADS,$prereq "$engine grep$GIT_PERF_7820_GREP_OPTS '$pattern' with $threads threads" " + git -c grep.patternType=$engine -c grep.threads=$threads grep$GIT_PERF_7820_GREP_OPTS -- '$pattern' >'out.$engine.$threads' || : + " + done + fi + done + + if ! test_have_prereq PERF_GREP_ENGINES_THREADS + then + test_expect_success "assert that all engines found the same for$GIT_PERF_7820_GREP_OPTS '$pattern'" ' + test_cmp out.basic out.extended && + if test_have_prereq PCRE + then + test_cmp out.basic out.perl + fi + ' + else + for threads in $GIT_PERF_GREP_THREADS + do + test_expect_success PTHREADS "assert that all engines found the same for$GIT_PERF_7820_GREP_OPTS '$pattern' under threading" " + test_cmp out.basic.$threads out.extended.$threads && + if test_have_prereq PCRE + then + test_cmp out.basic.$threads out.perl.$threads + fi + " + done + fi +done + +test_done diff --git a/t/perf/p7821-grep-engines-fixed.sh b/t/perf/p7821-grep-engines-fixed.sh new file mode 100755 index 0000000000..61e41b82cf --- /dev/null +++ b/t/perf/p7821-grep-engines-fixed.sh @@ -0,0 +1,74 @@ +#!/bin/sh + +test_description="Comparison of git-grep's regex engines with -F + +Set GIT_PERF_7821_GREP_OPTS in the environment to pass options to +git-grep. Make sure to include a leading space, +e.g. GIT_PERF_7821_GREP_OPTS=' -w'. See p7820-grep-engines.sh for more +options to try. + +If GIT_PERF_7821_THREADS is set to a list of threads (e.g. '1 4 8' +etc.) we will test the patterns under those numbers of threads. +" + +. ./perf-lib.sh + +test_perf_large_repo +test_checkout_worktree + +if test -n "$GIT_PERF_GREP_THREADS" +then + test_set_prereq PERF_GREP_ENGINES_THREADS +fi + +for pattern in 'int' 'uncommon' 'æ' +do + for engine in fixed basic extended perl + do + if test $engine = "perl" && ! test_have_prereq PCRE + then + prereq="PCRE" + else + prereq="" + fi + if ! test_have_prereq PERF_GREP_ENGINES_THREADS + then + test_perf $prereq "$engine grep$GIT_PERF_7821_GREP_OPTS $pattern" " + git -c grep.patternType=$engine grep$GIT_PERF_7821_GREP_OPTS $pattern >'out.$engine' || : + " + else + for threads in $GIT_PERF_GREP_THREADS + do + test_perf PTHREADS,$prereq "$engine grep$GIT_PERF_7821_GREP_OPTS $pattern with $threads threads" " + git -c grep.patternType=$engine -c grep.threads=$threads grep$GIT_PERF_7821_GREP_OPTS $pattern >'out.$engine.$threads' || : + " + done + fi + done + + if ! test_have_prereq PERF_GREP_ENGINES_THREADS + then + test_expect_success "assert that all engines found the same for$GIT_PERF_7821_GREP_OPTS $pattern" ' + test_cmp out.fixed out.basic && + test_cmp out.fixed out.extended && + if test_have_prereq PCRE + then + test_cmp out.fixed out.perl + fi + ' + else + for threads in $GIT_PERF_GREP_THREADS + do + test_expect_success PTHREADS "assert that all engines found the same for$GIT_PERF_7821_GREP_OPTS $pattern under threading" " + test_cmp out.fixed.$threads out.basic.$threads && + test_cmp out.fixed.$threads out.extended.$threads && + if test_have_prereq PCRE + then + test_cmp out.fixed.$threads out.perl.$threads + fi + " + done + fi +done + +test_done diff --git a/t/perf/perf-lib.sh b/t/perf/perf-lib.sh index ab4b8b06ae..2e33ab3ec3 100644 --- a/t/perf/perf-lib.sh +++ b/t/perf/perf-lib.sh @@ -56,12 +56,10 @@ MODERN_GIT=$GIT_BUILD_DIR/bin-wrappers/git export MODERN_GIT perf_results_dir=$TEST_OUTPUT_DIRECTORY/test-results +test -n "$GIT_PERF_SUBSECTION" && perf_results_dir="$perf_results_dir/$GIT_PERF_SUBSECTION" mkdir -p "$perf_results_dir" rm -f "$perf_results_dir"/$(basename "$0" .sh).subtests -if test -z "$GIT_PERF_REPEAT_COUNT"; then - GIT_PERF_REPEAT_COUNT=3 -fi die_if_build_dir_not_repo () { if ! ( cd "$TEST_DIRECTORY/.." && git rev-parse --build-dir >/dev/null 2>&1 ); then @@ -78,9 +76,13 @@ if test -z "$GIT_PERF_LARGE_REPO"; then GIT_PERF_LARGE_REPO=$TEST_DIRECTORY/.. fi +test_perf_do_repo_symlink_config_ () { + test_have_prereq SYMLINKS || git config core.symlinks false +} + test_perf_create_repo_from () { test "$#" = 2 || - error "bug in the test script: not 2 parameters to test-create-repo" + BUG "not 2 parameters to test-create-repo" repo="$1" source="$2" source_git="$("$MODERN_GIT" -C "$source" rev-parse --git-dir)" @@ -102,15 +104,29 @@ test_perf_create_repo_from () { ) && ( cd "$repo" && - "$MODERN_GIT" init -q && { - test_have_prereq SYMLINKS || - git config core.symlinks false - } && - mv .git/hooks .git/hooks-disabled 2>/dev/null + "$MODERN_GIT" init -q && + test_perf_do_repo_symlink_config_ && + mv .git/hooks .git/hooks-disabled 2>/dev/null && + if test -f .git/index.lock + then + # We may be copying a repo that can't run "git + # status" due to a locked index. Since we have + # a copy it's fine to remove the lock. + rm .git/index.lock + fi ) || error "failed to copy repository '$source' to '$repo'" } # call at least one of these to establish an appropriately-sized repository +test_perf_fresh_repo () { + repo="${1:-$TRASH_DIRECTORY}" + "$MODERN_GIT" init -q "$repo" && + ( + cd "$repo" && + test_perf_do_repo_symlink_config_ + ) +} + test_perf_default_repo () { test_perf_create_repo_from "${1:-$TRASH_DIRECTORY}" "$GIT_PERF_REPO" } @@ -163,47 +179,69 @@ exit $ret' >&3 2>&4 return "$eval_ret" } - -test_perf () { +test_wrapper_ () { + test_wrapper_func_=$1; shift test_start_ test "$#" = 3 && { test_prereq=$1; shift; } || test_prereq= test "$#" = 2 || - error "bug in the test script: not 2 or 3 parameters to test-expect-success" + BUG "not 2 or 3 parameters to test-expect-success" export test_prereq if ! test_skip "$@" then base=$(basename "$0" .sh) echo "$test_count" >>"$perf_results_dir"/$base.subtests echo "$1" >"$perf_results_dir"/$base.$test_count.descr - if test -z "$verbose"; then - printf "%s" "perf $test_count - $1:" - else - echo "perf $test_count - $1:" - fi - for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do - say >&3 "running: $2" - if test_run_perf_ "$2" - then - if test -z "$verbose"; then - printf " %s" "$i" - else - echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:" - fi + base="$perf_results_dir"/"$perf_results_prefix$(basename "$0" .sh)"."$test_count" + "$test_wrapper_func_" "$@" + fi + + test_finish_ +} + +test_perf_ () { + if test -z "$verbose"; then + printf "%s" "perf $test_count - $1:" + else + echo "perf $test_count - $1:" + fi + for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do + say >&3 "running: $2" + if test_run_perf_ "$2" + then + if test -z "$verbose"; then + printf " %s" "$i" else - test -z "$verbose" && echo - test_failure_ "$@" - break + echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:" fi - done - if test -z "$verbose"; then - echo " ok" else - test_ok_ "$1" + test -z "$verbose" && echo + test_failure_ "$@" + break fi - base="$perf_results_dir"/"$perf_results_prefix$(basename "$0" .sh)"."$test_count" - "$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times + done + if test -z "$verbose"; then + echo " ok" + else + test_ok_ "$1" fi - test_finish_ + "$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times +} + +test_perf () { + test_wrapper_ test_perf_ "$@" +} + +test_size_ () { + say >&3 "running: $2" + if test_eval_ "$2" 3>"$base".size; then + test_ok_ "$1" + else + test_failure_ "$@" + fi +} + +test_size () { + test_wrapper_ test_size_ "$@" } # We extend test_done to print timings at the end (./run disables this diff --git a/t/perf/repos/.gitignore b/t/perf/repos/.gitignore new file mode 100644 index 0000000000..72e3dc3e19 --- /dev/null +++ b/t/perf/repos/.gitignore @@ -0,0 +1 @@ +gen-*/ diff --git a/t/perf/repos/inflate-repo.sh b/t/perf/repos/inflate-repo.sh new file mode 100755 index 0000000000..fcfc992b5b --- /dev/null +++ b/t/perf/repos/inflate-repo.sh @@ -0,0 +1,85 @@ +#!/bin/sh +# Inflate the size of an EXISTING repo. +# +# This script should be run inside the worktree of a TEST repo. +# It will use the contents of the current HEAD to generate a +# commit containing copies of the current worktree such that the +# total size of the commit has at least <target_size> files. +# +# Usage: [-t target_size] [-b branch_name] + +set -e + +target_size=10000 +branch_name=p0006-ballast +ballast=ballast + +while test "$#" -ne 0 +do + case "$1" in + -b) + shift; + test "$#" -ne 0 || { echo 'error: -b requires an argument' >&2; exit 1; } + branch_name=$1; + shift ;; + -t) + shift; + test "$#" -ne 0 || { echo 'error: -t requires an argument' >&2; exit 1; } + target_size=$1; + shift ;; + *) + echo "error: unknown option '$1'" >&2; exit 1 ;; + esac +done + +git ls-tree -r HEAD >GEN_src_list +nr_src_files=$(cat GEN_src_list | wc -l) + +src_branch=$(git symbolic-ref --short HEAD) + +echo "Branch $src_branch initially has $nr_src_files files." + +if test $target_size -le $nr_src_files +then + echo "Repository already exceeds target size $target_size." + rm GEN_src_list + exit 1 +fi + +# Create well-known branch and add 1 file change to start +# if off before the ballast. +git checkout -b $branch_name HEAD +echo "$target_size" > inflate-repo.params +git add inflate-repo.params +git commit -q -m params + +# Create ballast for in our branch. +copy=1 +nr_files=$nr_src_files +while test $nr_files -lt $target_size +do + sed -e "s| | $ballast/$copy/|" <GEN_src_list | + git update-index --index-info + + nr_files=$(expr $nr_files + $nr_src_files) + copy=$(expr $copy + 1) +done +rm GEN_src_list +git commit -q -m "ballast" + +# Modify 1 file and commit. +echo "$target_size" >> inflate-repo.params +git add inflate-repo.params +git commit -q -m "ballast plus 1" + +nr_files=$(git ls-files | wc -l) + +# Checkout master to put repo in canonical state (because +# the perf test may need to clone and enable sparse-checkout +# before attempting to checkout a commit with the ballast +# (because it may contain 100K directories and 1M files)). +git checkout $src_branch + +echo "Repository inflated. Branch $branch_name has $nr_files files." + +exit 0 diff --git a/t/perf/repos/many-files.sh b/t/perf/repos/many-files.sh new file mode 100755 index 0000000000..28720e4e10 --- /dev/null +++ b/t/perf/repos/many-files.sh @@ -0,0 +1,110 @@ +#!/bin/sh +# Generate test data repository using the given parameters. +# When omitted, we create "gen-many-files-d-w-f.git". +# +# Usage: [-r repo] [-d depth] [-w width] [-f files] +# +# -r repo: path to the new repo to be generated +# -d depth: the depth of sub-directories +# -w width: the number of sub-directories at each level +# -f files: the number of files created in each directory +# +# Note that all files will have the same SHA-1 and each +# directory at a level will have the same SHA-1, so we +# will potentially have a large index, but not a large +# ODB. +# +# Ballast will be created under "ballast/". + +EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 + +set -e + +# (5, 10, 9) will create 999,999 ballast files. +# (4, 10, 9) will create 99,999 ballast files. +depth=5 +width=10 +files=9 + +while test "$#" -ne 0 +do + case "$1" in + -r) + shift; + test "$#" -ne 0 || { echo 'error: -r requires an argument' >&2; exit 1; } + repo=$1; + shift ;; + -d) + shift; + test "$#" -ne 0 || { echo 'error: -d requires an argument' >&2; exit 1; } + depth=$1; + shift ;; + -w) + shift; + test "$#" -ne 0 || { echo 'error: -w requires an argument' >&2; exit 1; } + width=$1; + shift ;; + -f) + shift; + test "$#" -ne 0 || { echo 'error: -f requires an argument' >&2; exit 1; } + files=$1; + shift ;; + *) + echo "error: unknown option '$1'" >&2; exit 1 ;; + esac +done + +# Inflate the index with thousands of empty files. +# usage: dir depth width files +fill_index() { + awk -v arg_dir=$1 -v arg_depth=$2 -v arg_width=$3 -v arg_files=$4 ' + function make_paths(dir, depth, width, files, f, w) { + for (f = 1; f <= files; f++) { + print dir "/file" f + } + if (depth > 0) { + for (w = 1; w <= width; w++) { + make_paths(dir "/dir" w, depth - 1, width, files) + } + } + } + END { make_paths(arg_dir, arg_depth, arg_width, arg_files) } + ' </dev/null | + sed "s/^/100644 $EMPTY_BLOB /" | + git update-index --index-info + return 0 +} + +[ -z "$repo" ] && repo=gen-many-files-$depth.$width.$files.git + +mkdir $repo +cd $repo +git init . + +# Create an initial commit just to define master. +touch many-files.empty +echo "$depth $width $files" >many-files.params +git add many-files.* +git commit -q -m params + +# Create ballast for p0006 based upon the given params and +# inflate the index with thousands of empty files and commit. +git checkout -b p0006-ballast +fill_index "ballast" $depth $width $files +git commit -q -m "ballast" + +nr_files=$(git ls-files | wc -l) + +# Modify 1 file and commit. +echo "$depth $width $files" >>many-files.params +git add many-files.params +git commit -q -m "ballast plus 1" + +# Checkout master to put repo in canonical state (because +# the perf test may need to clone and enable sparse-checkout +# before attempting to checkout a commit with the ballast +# (because it may contain 100K directories and 1M files)). +git checkout master + +echo "Repository "$repo" ($depth, $width, $files) created. Ballast $nr_files." +exit 0 diff --git a/t/perf/run b/t/perf/run index c788d713ae..9aaa733c77 100755 --- a/t/perf/run +++ b/t/perf/run @@ -1,17 +1,35 @@ #!/bin/sh -case "$1" in - --help) - echo "usage: $0 [other_git_tree...] [--] [test_scripts]" - exit 0 - ;; -esac - die () { echo >&2 "error: $*" exit 1 } +while [ $# -gt 0 ]; do + arg="$1" + case "$arg" in + --) + break ;; + --help) + echo "usage: $0 [--config file] [--subsection subsec] [other_git_tree...] [--] [test_scripts]" + exit 0 ;; + --config) + shift + GIT_PERF_CONFIG_FILE=$(cd "$(dirname "$1")"; pwd)/$(basename "$1") + export GIT_PERF_CONFIG_FILE + shift ;; + --subsection) + shift + GIT_PERF_SUBSECTION="$1" + export GIT_PERF_SUBSECTION + shift ;; + --*) + die "unrecognised option: '$arg'" ;; + *) + break ;; + esac +done + run_one_dir () { if test $# -eq 0; then set -- p????-*.sh @@ -24,12 +42,15 @@ run_one_dir () { unpack_git_rev () { rev=$1 + echo "=== Unpacking $rev in build/$rev ===" mkdir -p build/$rev (cd "$(git rev-parse --show-cdup)" && git archive --format=tar $rev) | (cd build/$rev && tar x) } + build_git_rev () { rev=$1 + name="$2" for config in config.mak config.mak.autogen config.status do if test -e "../../$config" @@ -37,8 +58,16 @@ build_git_rev () { cp "../../$config" "build/$rev/" fi done - (cd build/$rev && make $GIT_PERF_MAKE_OPTS) || - die "failed to build revision '$mydir'" + echo "=== Building $rev ($name) ===" + ( + cd build/$rev && + if test -n "$GIT_PERF_MAKE_COMMAND" + then + sh -c "$GIT_PERF_MAKE_COMMAND" + else + make $GIT_PERF_MAKE_OPTS + fi + ) || die "failed to build revision '$mydir'" } run_dirs_helper () { @@ -56,7 +85,7 @@ run_dirs_helper () { if [ ! -d build/$rev ]; then unpack_git_rev $rev fi - build_git_rev $rev + build_git_rev $rev "$mydir" mydir=build/$rev fi if test "$mydir" = .; then @@ -78,14 +107,118 @@ run_dirs () { done } -GIT_PERF_AGGREGATING_LATER=t -export GIT_PERF_AGGREGATING_LATER +get_subsections () { + section="$1" + test -z "$GIT_PERF_CONFIG_FILE" && return + git config -f "$GIT_PERF_CONFIG_FILE" --name-only --get-regex "$section\..*\.[^.]+" | + sed -e "s/$section\.\(.*\)\..*/\1/" | sort | uniq +} + +get_var_from_env_or_config () { + env_var="$1" + conf_sec="$2" + conf_var="$3" + conf_opts="$4" # optional + + # Do nothing if the env variable is already set + eval "test -z \"\${$env_var+x}\"" || return + + test -z "$GIT_PERF_CONFIG_FILE" && return + + # Check if the variable is in the config file + if test -n "$GIT_PERF_SUBSECTION" + then + var="$conf_sec.$GIT_PERF_SUBSECTION.$conf_var" + conf_value=$(git config $conf_opts -f "$GIT_PERF_CONFIG_FILE" "$var") && + eval "$env_var=\"$conf_value\"" && return + fi + var="$conf_sec.$conf_var" + conf_value=$(git config $conf_opts -f "$GIT_PERF_CONFIG_FILE" "$var") && + eval "$env_var=\"$conf_value\"" +} + +run_subsection () { + get_var_from_env_or_config "GIT_PERF_REPEAT_COUNT" "perf" "repeatCount" "--int" + : ${GIT_PERF_REPEAT_COUNT:=3} + export GIT_PERF_REPEAT_COUNT + + get_var_from_env_or_config "GIT_PERF_DIRS_OR_REVS" "perf" "dirsOrRevs" + set -- $GIT_PERF_DIRS_OR_REVS "$@" + + get_var_from_env_or_config "GIT_PERF_MAKE_COMMAND" "perf" "makeCommand" + get_var_from_env_or_config "GIT_PERF_MAKE_OPTS" "perf" "makeOpts" + + get_var_from_env_or_config "GIT_PERF_REPO_NAME" "perf" "repoName" + export GIT_PERF_REPO_NAME + + GIT_PERF_AGGREGATING_LATER=t + export GIT_PERF_AGGREGATING_LATER + + if test $# = 0 -o "$1" = -- -o -f "$1"; then + set -- . "$@" + fi + + codespeed_opt= + test "$GIT_PERF_CODESPEED_OUTPUT" = "true" && codespeed_opt="--codespeed" + + run_dirs "$@" + + if test -z "$GIT_PERF_SEND_TO_CODESPEED" + then + ./aggregate.perl $codespeed_opt "$@" + else + json_res_file="test-results/$GIT_PERF_SUBSECTION/aggregate.json" + ./aggregate.perl --codespeed "$@" | tee "$json_res_file" + send_data_url="$GIT_PERF_SEND_TO_CODESPEED/result/add/json/" + curl -v --request POST --data-urlencode "json=$(cat "$json_res_file")" "$send_data_url" + fi +} + +get_var_from_env_or_config "GIT_PERF_CODESPEED_OUTPUT" "perf" "codespeedOutput" "--bool" +get_var_from_env_or_config "GIT_PERF_SEND_TO_CODESPEED" "perf" "sendToCodespeed" cd "$(dirname $0)" . ../../GIT-BUILD-OPTIONS -if test $# = 0 -o "$1" = -- -o -f "$1"; then - set -- . "$@" +mkdir -p test-results +get_subsections "perf" >test-results/run_subsections.names + +if test $(wc -l <test-results/run_subsections.names) -eq 0 +then + if test -n "$GIT_PERF_SUBSECTION" + then + if test -n "$GIT_PERF_CONFIG_FILE" + then + die "no subsections are defined in config file '$GIT_PERF_CONFIG_FILE'" + else + die "subsection '$GIT_PERF_SUBSECTION' defined without a config file" + fi + fi + ( + run_subsection "$@" + ) +elif test -n "$GIT_PERF_SUBSECTION" +then + egrep "^$GIT_PERF_SUBSECTION\$" test-results/run_subsections.names >/dev/null || + die "subsection '$GIT_PERF_SUBSECTION' not found in '$GIT_PERF_CONFIG_FILE'" + + egrep "^$GIT_PERF_SUBSECTION\$" test-results/run_subsections.names | while read -r subsec + do + ( + GIT_PERF_SUBSECTION="$subsec" + export GIT_PERF_SUBSECTION + echo "======== Run for subsection '$GIT_PERF_SUBSECTION' ========" + run_subsection "$@" + ) + done +else + while read -r subsec + do + ( + GIT_PERF_SUBSECTION="$subsec" + export GIT_PERF_SUBSECTION + echo "======== Run for subsection '$GIT_PERF_SUBSECTION' ========" + run_subsection "$@" + ) + done <test-results/run_subsections.names fi -run_dirs "$@" -./aggregate.perl "$@" |