From f396f01f11208789875b61e4e0e3239b04f9e38d Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Tue, 23 May 2006 00:45:47 +1200 Subject: cvsimport: minor fixups Cleanup @skipped after it's used. Close a fhandle. Removing suspects one at a time. Signed-off-by: Martin Langhoff Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 3 +++ 1 file changed, 3 insertions(+) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index 8c707f2c66..282646af35 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -650,6 +650,8 @@ my $commit = sub { "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), "git-commit-tree", $tree,@par); die "Cannot exec git-commit-tree: $!\n"; + + close OUT; } $pw->writer(); $pr->reader(); @@ -661,6 +663,7 @@ my $commit = sub { if (@skipped) { $logmsg .= "\n\n\nSKIPPED:\n\t"; $logmsg .= join("\n\t", @skipped) . "\n"; + @skipped = (); } print $pw "$logmsg\n" -- cgit v1.2.3 From c4b16f8d7786c2a9655636779ce4e3e89f0df86c Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Tue, 23 May 2006 00:45:39 +1200 Subject: cvsimport: replace anonymous sub ref with a normal sub commit() does not need to be an anonymous subreference. Keep it simple. Signed-off-by: Martin Langhoff Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index 282646af35..d257e668d6 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -563,7 +563,7 @@ my $state = 0; my($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg); my(@old,@new,@skipped); -my $commit = sub { +sub commit { my $pid; while(@old) { my @o2; @@ -852,7 +852,7 @@ while() { } elsif($state == 9 and /^\s*$/) { $state = 10; } elsif(($state == 9 or $state == 10) and /^-+$/) { - &$commit(); + commit(); $state = 1; } elsif($state == 11 and /^-+$/) { $state = 1; @@ -862,7 +862,7 @@ while() { print "* UNKNOWN LINE * $_\n"; } } -&$commit() if $branch and $state != 11; +commit() if $branch and $state != 11; unlink($git_index); -- cgit v1.2.3 From 06918348de86774d0fad19c7076747b8182d1c74 Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Mon, 22 May 2006 23:38:08 +1200 Subject: cvsimport: introduce -L option to workaround memory leaks Signed-off-by: Martin Langhoff Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index d257e668d6..6c232c0d4b 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -29,7 +29,7 @@ use IPC::Open2; $SIG{'PIPE'}="IGNORE"; $ENV{'TZ'}="UTC"; -our($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,$opt_M,$opt_A,$opt_S); +our($opt_h,$opt_o,$opt_v,$opt_k,$opt_u,$opt_d,$opt_p,$opt_C,$opt_z,$opt_i,$opt_P, $opt_s,$opt_m,$opt_M,$opt_A,$opt_S,$opt_L); my (%conv_author_name, %conv_author_email); sub usage() { @@ -85,7 +85,7 @@ sub write_author_info($) { close ($f); } -getopts("hivmkuo:d:p:C:z:s:M:P:A:S:") or usage(); +getopts("hivmkuo:d:p:C:z:s:M:P:A:S:L:") or usage(); usage if $opt_h; @ARGV <= 1 or usage(); @@ -719,6 +719,7 @@ sub commit { } }; +my $commitcount = 1; while() { chomp; if($state == 0 and /^-+$/) { @@ -852,6 +853,9 @@ while() { } elsif($state == 9 and /^\s*$/) { $state = 10; } elsif(($state == 9 or $state == 10) and /^-+$/) { + if ($opt_L && $commitcount++ >= $opt_L) { + last; + } commit(); $state = 1; } elsif($state == 11 and /^-+$/) { -- cgit v1.2.3 From 4adcea995e97361d0900aaf27c60fad0b03b9ad1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 May 2006 19:28:37 -0700 Subject: cvsimport: repack every kilo-commits. Signed-off-by: Linus Torvalds Acked-by: Martin Langhoff Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index 6c232c0d4b..712cdc0e32 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -853,10 +853,14 @@ while() { } elsif($state == 9 and /^\s*$/) { $state = 10; } elsif(($state == 9 or $state == 10) and /^-+$/) { - if ($opt_L && $commitcount++ >= $opt_L) { + $commitcount++; + if ($opt_L && $commitcount > $opt_L) { last; } commit(); + if (($commitcount & 1023) == 0) { + system("git repack -a -d"); + } $state = 1; } elsif($state == 11 and /^-+$/) { $state = 1; -- cgit v1.2.3 From 6a1871e174fee1757713df7a3d776dd3813e7ad8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 23 May 2006 03:27:45 -0400 Subject: cvsimport: use git-update-index --index-info This should reduce the number of git-update-index forks required per commit. We now do adds/removes in one call, and we are no longer forced to deal with argv limitations. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index 712cdc0e32..90ca018261 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -565,29 +565,19 @@ my($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg); my(@old,@new,@skipped); sub commit { my $pid; - while(@old) { - my @o2; - if(@old > 55) { - @o2 = splice(@old,0,50); - } else { - @o2 = @old; - @old = (); - } - system("git-update-index","--force-remove","--",@o2); - die "Cannot remove files: $?\n" if $?; - } - while(@new) { - my @n2; - if(@new > 12) { - @n2 = splice(@new,0,10); - } else { - @n2 = @new; - @new = (); - } - system("git-update-index","--add", - (map { ('--cacheinfo', @$_) } @n2)); - die "Cannot add files: $?\n" if $?; - } + + open(my $fh, '|-', qw(git-update-index -z --index-info)) + or die "unable to open git-update-index: $!"; + print $fh + (map { "0 0000000000000000000000000000000000000000\t$_\0" } + @old), + (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" } + @new) + or die "unable to write to git-update-index: $!"; + close $fh + or die "unable to write to git-update-index: $!"; + $? and die "git-update-index reported error: $?"; + @old = @new = (); $pid = open(C,"-|"); die "Cannot fork: $!" unless defined $pid; -- cgit v1.2.3 From e73aefe4fdba0d161d9878642c69b40d83a0204c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 23 May 2006 03:27:46 -0400 Subject: cvsimport: cleanup commit function This change attempts to clean up the commit function to make it a bit easier to read (or at least the first half of it). It also improves robustness and performance. Specifically: - report get_headref errors on opening ref unless the error is ENOENT - use regex to check for sha1 instead of length - use lexically scoped filehandles which get cleaned up automagically - check for error on both 'print' and 'close' (since output is buffered) - avoid "fork, do some perl, then exec" in commit(). It's not necessary, and we probably end up COW'ing parts of the perl process. Plus the code is much smaller because we can use open2() - avoid calling strftime over and over (mainly a readability cleanup) Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 150 +++++++++++++++++++++++------------------------------ 1 file changed, 64 insertions(+), 86 deletions(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index 90ca018261..f0e4d2422b 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -23,7 +23,7 @@ use File::Basename qw(basename dirname); use Time::Local; use IO::Socket; use IO::Pipe; -use POSIX qw(strftime dup2); +use POSIX qw(strftime dup2 :errno_h); use IPC::Open2; $SIG{'PIPE'}="IGNORE"; @@ -429,22 +429,25 @@ sub getwd() { return $pwd; } +sub is_sha1 { + my $s = shift; + return $s =~ /^[a-f0-9]{40}$/; +} -sub get_headref($$) { +sub get_headref ($$) { my $name = shift; my $git_dir = shift; - my $sha; - if (open(C,"$git_dir/refs/heads/$name")) { - chomp($sha = ); - close(C); - length($sha) == 40 - or die "Cannot get head id for $name ($sha): $!\n"; + my $f = "$git_dir/refs/heads/$name"; + if(open(my $fh, $f)) { + chomp(my $r = <$fh>); + is_sha1($r) or die "Cannot get head id for $name ($r): $!"; + return $r; } - return $sha; + die "unable to open $f: $!" unless $! == POSIX::ENOENT; + return undef; } - -d $git_tree or mkdir($git_tree,0777) or die "Could not create $git_tree: $!"; @@ -561,90 +564,67 @@ unless($pid) { my $state = 0; -my($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg); -my(@old,@new,@skipped); -sub commit { - my $pid; - +sub update_index (\@\@) { + my $old = shift; + my $new = shift; open(my $fh, '|-', qw(git-update-index -z --index-info)) or die "unable to open git-update-index: $!"; print $fh (map { "0 0000000000000000000000000000000000000000\t$_\0" } - @old), + @$old), (map { '100' . sprintf('%o', $_->[0]) . " $_->[1]\t$_->[2]\0" } - @new) + @$new) or die "unable to write to git-update-index: $!"; close $fh or die "unable to write to git-update-index: $!"; $? and die "git-update-index reported error: $?"; - @old = @new = (); +} - $pid = open(C,"-|"); - die "Cannot fork: $!" unless defined $pid; - unless($pid) { - exec("git-write-tree"); - die "Cannot exec git-write-tree: $!\n"; - } - chomp(my $tree = ); - length($tree) == 40 - or die "Cannot get tree id ($tree): $!\n"; - close(C) +sub write_tree () { + open(my $fh, '-|', qw(git-write-tree)) + or die "unable to open git-write-tree: $!"; + chomp(my $tree = <$fh>); + is_sha1($tree) + or die "Cannot get tree id ($tree): $!"; + close($fh) or die "Error running git-write-tree: $?\n"; print "Tree ID $tree\n" if $opt_v; + return $tree; +} - my $parent = ""; - if(open(C,"$git_dir/refs/heads/$last_branch")) { - chomp($parent = ); - close(C); - length($parent) == 40 - or die "Cannot get parent id ($parent): $!\n"; - print "Parent ID $parent\n" if $opt_v; - } - - my $pr = IO::Pipe->new() or die "Cannot open pipe: $!\n"; - my $pw = IO::Pipe->new() or die "Cannot open pipe: $!\n"; - $pid = fork(); - die "Fork: $!\n" unless defined $pid; - unless($pid) { - $pr->writer(); - $pw->reader(); - open(OUT,">&STDOUT"); - dup2($pw->fileno(),0); - dup2($pr->fileno(),1); - $pr->close(); - $pw->close(); - - my @par = (); - @par = ("-p",$parent) if $parent; - - # loose detection of merges - # based on the commit msg - foreach my $rx (@mergerx) { - if ($logmsg =~ $rx) { - my $mparent = $1; - if ($mparent eq 'HEAD') { $mparent = $opt_o }; - if ( -e "$git_dir/refs/heads/$mparent") { - $mparent = get_headref($mparent, $git_dir); - push @par, '-p', $mparent; - print OUT "Merge parent branch: $mparent\n" if $opt_v; - } - } +my($patchset,$date,$author_name,$author_email,$branch,$ancestor,$tag,$logmsg); +my(@old,@new,@skipped); +sub commit { + update_index(@old, @new); + @old = @new = (); + my $tree = write_tree(); + my $parent = get_headref($last_branch, $git_dir); + print "Parent ID " . ($parent ? $parent : "(empty)") . "\n" if $opt_v; + + my @commit_args; + push @commit_args, ("-p", $parent) if $parent; + + # loose detection of merges + # based on the commit msg + foreach my $rx (@mergerx) { + next unless $logmsg =~ $rx && $1; + my $mparent = $1 eq 'HEAD' ? $opt_o : $1; + if(my $sha1 = get_headref($mparent, $git_dir)) { + push @commit_args, '-p', $mparent; + print "Merge parent branch: $mparent\n" if $opt_v; } - - exec("env", - "GIT_AUTHOR_NAME=$author_name", - "GIT_AUTHOR_EMAIL=$author_email", - "GIT_AUTHOR_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), - "GIT_COMMITTER_NAME=$author_name", - "GIT_COMMITTER_EMAIL=$author_email", - "GIT_COMMITTER_DATE=".strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)), - "git-commit-tree", $tree,@par); - die "Cannot exec git-commit-tree: $!\n"; - - close OUT; } - $pw->writer(); - $pr->reader(); + + my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)); + my $pid = open2(my $commit_read, my $commit_write, + 'env', + "GIT_AUTHOR_NAME=$author_name", + "GIT_AUTHOR_EMAIL=$author_email", + "GIT_AUTHOR_DATE=$commit_date", + "GIT_COMMITTER_NAME=$author_name", + "GIT_COMMITTER_EMAIL=$author_email", + "GIT_COMMITTER_DATE=$commit_date", + 'git-commit-tree', $tree, @commit_args); # compatibility with git2cvs substr($logmsg,32767) = "" if length($logmsg) > 32767; @@ -656,16 +636,14 @@ sub commit { @skipped = (); } - print $pw "$logmsg\n" + print($commit_write "$logmsg\n") && close($commit_write) or die "Error writing to git-commit-tree: $!\n"; - $pw->close(); - print "Committed patch $patchset ($branch ".strftime("%Y-%m-%d %H:%M:%S",gmtime($date)).")\n" if $opt_v; - chomp(my $cid = <$pr>); - length($cid) == 40 - or die "Cannot get commit id ($cid): $!\n"; + print "Committed patch $patchset ($branch $commit_date)\n" if $opt_v; + chomp(my $cid = <$commit_read>); + is_sha1($cid) or die "Cannot get commit id ($cid): $!\n"; print "Commit ID $cid\n" if $opt_v; - $pr->close(); + close($commit_read); waitpid($pid,0); die "Error running git-commit-tree: $?\n" if $?; -- cgit v1.2.3 From 55cad8429954c7d08d0ce86155e7f9adf2f4c6ad Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Tue, 23 May 2006 20:08:58 +1200 Subject: cvsimport: introduce _fetchfile() method and used a 1M buffer to read() File retrieval from the socket is now moved to _fetchfile() and we now cap reads at 1MB. This should limit the memory growth of the cvsimport process. Signed-off-by: Martin Langhoff Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index f0e4d2422b..41ee9a608d 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -315,15 +315,7 @@ sub _line { chomp $cnt; die "Duh: Filesize $cnt" if $cnt !~ /^\d+$/; $line=""; - $res=0; - while($cnt) { - my $buf; - my $num = $self->{'socketi'}->read($buf,$cnt); - die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0; - print $fh $buf; - $res += $num; - $cnt -= $num; - } + $res = $self->_fetchfile($fh, $cnt); } elsif($line =~ s/^ //) { print $fh $line; $res += length($line); @@ -335,14 +327,7 @@ sub _line { chomp $cnt; die "Duh: Mbinary $cnt" if $cnt !~ /^\d+$/ or $cnt<1; $line=""; - while($cnt) { - my $buf; - my $num = $self->{'socketi'}->read($buf,$cnt); - die "S: Mbinary $cnt: $num: $!\n" if not defined $num or $num<=0; - print $fh $buf; - $res += $num; - $cnt -= $num; - } + $res += $self->_fetchfile($fh, $cnt); } else { chomp $line; if($line eq "ok") { @@ -384,6 +369,23 @@ sub file { return ($name, $res); } +sub _fetchfile { + my ($self, $fh, $cnt) = @_; + my $res; + my $bufsize = 1024 * 1024; + while($cnt) { + if ($bufsize > $cnt) { + $bufsize = $cnt; + } + my $buf; + my $num = $self->{'socketi'}->read($buf,$bufsize); + die "Server: Filesize $cnt: $num: $!\n" if not defined $num or $num<=0; + print $fh $buf; + $res += $num; + $cnt -= $num; + } + return $res; +} package main; -- cgit v1.2.3 From 61efa5e300386978dd440716260c94e951a493b4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 23 May 2006 16:30:39 -0700 Subject: cvsimport: do not barf on creation of an empty file. When the server says "created this file whose length is empty", we mistakenly said "oops, the server did not say a sensible thing". Fix it. Spotted and fixed by Linus, acked by Martin. Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index 41ee9a608d..60fc86a5be 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -371,7 +371,7 @@ sub file { } sub _fetchfile { my ($self, $fh, $cnt) = @_; - my $res; + my $res = 0; my $bufsize = 1024 * 1024; while($cnt) { if ($bufsize > $cnt) { -- cgit v1.2.3 From 62bf0d962963794e9fbcdfdd43419b060d5d245f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 23 May 2006 16:59:44 -0400 Subject: cvsimport: set up commit environment in perl instead of using env Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index 60fc86a5be..af331d9c43 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -618,14 +618,13 @@ sub commit { } my $commit_date = strftime("+0000 %Y-%m-%d %H:%M:%S",gmtime($date)); + $ENV{GIT_AUTHOR_NAME} = $author_name; + $ENV{GIT_AUTHOR_EMAIL} = $author_email; + $ENV{GIT_AUTHOR_DATE} = $commit_date; + $ENV{GIT_COMMITTER_NAME} = $author_name; + $ENV{GIT_COMMITTER_EMAIL} = $author_email; + $ENV{GIT_COMMITTER_DATE} = $commit_date; my $pid = open2(my $commit_read, my $commit_write, - 'env', - "GIT_AUTHOR_NAME=$author_name", - "GIT_AUTHOR_EMAIL=$author_email", - "GIT_AUTHOR_DATE=$commit_date", - "GIT_COMMITTER_NAME=$author_name", - "GIT_COMMITTER_EMAIL=$author_email", - "GIT_COMMITTER_DATE=$commit_date", 'git-commit-tree', $tree, @commit_args); # compatibility with git2cvs -- cgit v1.2.3 From e49289dfb788ce47af2939621540fa97abe318ae Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 24 May 2006 09:58:28 -0400 Subject: cvsimport: avoid "use" with :tag Avoid "use POSIX qw(strftime dup2 :errno_h)"; it was reported that a Perl installations on Mandrake 9.1 did not like it, even though it understood "use POSIX qw(:errno_h)". Funny. Signed-off-by: Junio C Hamano --- git-cvsimport.perl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'git-cvsimport.perl') diff --git a/git-cvsimport.perl b/git-cvsimport.perl index af331d9c43..76f6246a31 100755 --- a/git-cvsimport.perl +++ b/git-cvsimport.perl @@ -23,7 +23,7 @@ use File::Basename qw(basename dirname); use Time::Local; use IO::Socket; use IO::Pipe; -use POSIX qw(strftime dup2 :errno_h); +use POSIX qw(strftime dup2 ENOENT); use IPC::Open2; $SIG{'PIPE'}="IGNORE"; -- cgit v1.2.3