From 9ae9ca1f95ccd3b4c1d62d856eb8b48fdff7c2d5 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Fri, 9 Aug 2013 17:39:54 -0400 Subject: contacts: validate hunk length earlier Rather than calling get_blame() with a zero-length hunk only to have it rejected immediately, perform hunk-length validation earlier in order to avoid calling get_blame() unnecessarily. This is a preparatory step to simplify later patches which reduce the number of git-blame invocations by collecting together all lines to blame within a single file at a particular revision. By validating the blame range early, the subsequent patch can more easily avoid adding empty ranges at collection time. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'contrib/contacts/git-contacts') diff --git a/contrib/contacts/git-contacts b/contrib/contacts/git-contacts index d80f7d1b6e..4fbb2ef85a 100755 --- a/contrib/contacts/git-contacts +++ b/contrib/contacts/git-contacts @@ -60,8 +60,6 @@ sub import_commits { sub get_blame { my ($commits, $source, $start, $len, $from) = @_; - $len = 1 unless defined($len); - return if $len == 0; open my $f, '-|', qw(git blame --porcelain -C), '-L', "$start,+$len", '--since', $since, "$from^", '--', $source or die; @@ -90,7 +88,8 @@ sub scan_patches { } elsif (/^--- /) { die "Cannot parse hunk source: $_\n"; } elsif (/^@@ -(\d+)(?:,(\d+))?/ && $source) { - get_blame($commits, $source, $1, $2, $id); + my $len = defined($2) ? $2 : 1; + get_blame($commits, $source, $1, $len, $id) if $len; } } } -- cgit v1.2.3 From db8cae7e6035dd56970c88ac9e34bce1baa0f665 Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Fri, 9 Aug 2013 17:39:55 -0400 Subject: contacts: gather all blame sources prior to invoking git-blame git-contacts invokes git-blame immediately upon encountering a patch hunk. No attempt is made to consolidate invocations for multiple hunks referencing the same file at the same revision. This can become expensive quickly. Any effort to reduce the number of times git-blame is run will need to to know in advance which line ranges to blame per file per revision. Make this information available by collecting all sources as a distinct step from invoking git-blame. A subsequent patch will utilize the information to optimize git-blame invocations. Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) (limited to 'contrib/contacts/git-contacts') diff --git a/contrib/contacts/git-contacts b/contrib/contacts/git-contacts index 4fbb2ef85a..b4d3526a46 100755 --- a/contrib/contacts/git-contacts +++ b/contrib/contacts/git-contacts @@ -74,8 +74,20 @@ sub get_blame { close $f; } +sub blame_sources { + my ($sources, $commits) = @_; + for my $s (keys %$sources) { + for my $id (keys %{$sources->{$s}}) { + for my $range (@{$sources->{$s}{$id}}) { + get_blame($commits, $s, + $range->[0], $range->[1], $id); + } + } + } +} + sub scan_patches { - my ($commits, $id, $f) = @_; + my ($sources, $id, $f) = @_; my $source; while (<$f>) { if (/^From ([0-9a-f]{40}) Mon Sep 17 00:00:00 2001$/) { @@ -89,7 +101,7 @@ sub scan_patches { die "Cannot parse hunk source: $_\n"; } elsif (/^@@ -(\d+)(?:,(\d+))?/ && $source) { my $len = defined($2) ? $2 : 1; - get_blame($commits, $source, $1, $len, $id) if $len; + push @{$sources->{$source}{$id}}, [$1, $len] if $len; } } } @@ -162,13 +174,16 @@ for (@ARGV) { } } -my %commits; +my %sources; for (@files) { - scan_patch_file(\%commits, $_); + scan_patch_file(\%sources, $_); } if (@rev_args) { - scan_rev_args(\%commits, \@rev_args) + scan_rev_args(\%sources, \@rev_args) } + +my %commits; +blame_sources(\%sources, \%commits); import_commits(\%commits); my $contacts = {}; -- cgit v1.2.3 From 4c70cfbfbc2da8d0e4f94b73e37b4020704a498b Mon Sep 17 00:00:00 2001 From: Eric Sunshine Date: Fri, 9 Aug 2013 17:39:56 -0400 Subject: contacts: reduce git-blame invocations git-contacts invokes git-blame once for each patch hunk it encounters. No attempt is made to consolidate invocations for multiple hunks referencing the same file at the same revision. This can become expensive quickly. Reduce the number of git-blame invocations by taking advantage of the ability to specify multiple -L ranges for a single invocation. Without this patch, on a randomly chosen range of commits: % time git-contacts 25fba78d36be6297^..23c339c0f262aad2 >/dev/null real 0m6.142s user 0m5.429s sys 0m0.356s With this patch: % time git-contacts 25fba78d36be6297^..23c339c0f262aad2 >/dev/null real 0m2.285s user 0m2.093s sys 0m0.165s Signed-off-by: Eric Sunshine Signed-off-by: Junio C Hamano --- contrib/contacts/git-contacts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'contrib/contacts/git-contacts') diff --git a/contrib/contacts/git-contacts b/contrib/contacts/git-contacts index b4d3526a46..fb6429b64b 100755 --- a/contrib/contacts/git-contacts +++ b/contrib/contacts/git-contacts @@ -59,9 +59,11 @@ sub import_commits { } sub get_blame { - my ($commits, $source, $start, $len, $from) = @_; + my ($commits, $source, $from, $ranges) = @_; + return unless @$ranges; open my $f, '-|', - qw(git blame --porcelain -C), '-L', "$start,+$len", + qw(git blame --porcelain -C), + map({"-L$_->[0],+$_->[1]"} @$ranges), '--since', $since, "$from^", '--', $source or die; while (<$f>) { if (/^([0-9a-f]{40}) \d+ \d+ \d+$/) { @@ -78,10 +80,7 @@ sub blame_sources { my ($sources, $commits) = @_; for my $s (keys %$sources) { for my $id (keys %{$sources->{$s}}) { - for my $range (@{$sources->{$s}{$id}}) { - get_blame($commits, $s, - $range->[0], $range->[1], $id); - } + get_blame($commits, $s, $id, $sources->{$s}{$id}); } } } -- cgit v1.2.3