diff options
Diffstat (limited to 'perl/Git/SVN.pm')
-rw-r--r-- | perl/Git/SVN.pm | 109 |
1 files changed, 91 insertions, 18 deletions
diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index f24372645b..d94d01cfdc 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -9,10 +9,8 @@ use vars qw/$_no_metadata $_use_log_author $_add_author_from $_localtime/; use Carp qw/croak/; use File::Path qw/mkpath/; -use File::Copy qw/copy/; use IPC::Open3; use Memoize; # core since 5.8.0, Jul 2002 -use Memoize::Storable; use POSIX qw(:signal_h); use Time::Local; @@ -33,11 +31,7 @@ use Git::SVN::Utils qw( add_path_to_url ); -my $can_use_yaml; -BEGIN { - $can_use_yaml = eval { require Git::SVN::Memoize::YAML; 1}; -} - +my $memo_backend; our $_follow_parent = 1; our $_minimize_url = 'unset'; our $default_repo_id = 'svn'; @@ -103,7 +97,8 @@ sub resolve_local_globs { "existing: $existing\n", " globbed: $refname\n"; } - my $u = (::cmt_metadata("$refname"))[0]; + my $u = (::cmt_metadata("$refname"))[0] or die + "$refname: no associated commit metadata\n"; $u =~ s!^\Q$url\E(/|$)!! or die "$refname: '$url' not found in '$u'\n"; if ($pathname ne $u) { @@ -1217,20 +1212,87 @@ sub do_fetch { sub mkemptydirs { my ($self, $r) = @_; + # add/remove/collect a paths table + # + # Paths are split into a tree of nodes, stored as a hash of hashes. + # + # Each node contains a 'path' entry for the path (if any) associated + # with that node and a 'children' entry for any nodes under that + # location. + # + # Removing a path requires a hash lookup for each component then + # dropping that node (and anything under it), which is substantially + # faster than a grep slice into a single hash of paths for large + # numbers of paths. + # + # For a large (200K) number of empty_dir directives this reduces + # scanning time to 3 seconds vs 10 minutes for grep+delete on a single + # hash of paths. + sub add_path { + my ($paths_table, $path) = @_; + my $node_ref; + + foreach my $x (split('/', $path)) { + if (!exists($paths_table->{$x})) { + $paths_table->{$x} = { children => {} }; + } + + $node_ref = $paths_table->{$x}; + $paths_table = $paths_table->{$x}->{children}; + } + + $node_ref->{path} = $path; + } + + sub remove_path { + my ($paths_table, $path) = @_; + my $nodes_ref; + my $node_name; + + foreach my $x (split('/', $path)) { + if (!exists($paths_table->{$x})) { + return; + } + + $nodes_ref = $paths_table; + $node_name = $x; + + $paths_table = $paths_table->{$x}->{children}; + } + + delete($nodes_ref->{$node_name}); + } + + sub collect_paths { + my ($paths_table, $paths_ref) = @_; + + foreach my $v (values %$paths_table) { + my $p = $v->{path}; + my $c = $v->{children}; + + collect_paths($c, $paths_ref); + + if (defined($p)) { + push(@$paths_ref, $p); + } + } + } + sub scan { - my ($r, $empty_dirs, $line) = @_; + my ($r, $paths_table, $line) = @_; if (defined $r && $line =~ /^r(\d+)$/) { return 0 if $1 > $r; } elsif ($line =~ /^ \+empty_dir: (.+)$/) { - $empty_dirs->{$1} = 1; + add_path($paths_table, $1); } elsif ($line =~ /^ \-empty_dir: (.+)$/) { - my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs); - delete @$empty_dirs{@d}; + remove_path($paths_table, $1); } 1; # continue }; - my %empty_dirs = (); + my @empty_dirs; + my %paths_table; + my $gz_file = "$self->{dir}/unhandled.log.gz"; if (-f $gz_file) { if (!can_compress()) { @@ -1241,7 +1303,7 @@ sub mkemptydirs { die "Unable to open $gz_file: $!\n"; my $line; while ($gz->gzreadline($line) > 0) { - scan($r, \%empty_dirs, $line) or last; + scan($r, \%paths_table, $line) or last; } $gz->gzclose; } @@ -1250,13 +1312,14 @@ sub mkemptydirs { if (open my $fh, '<', "$self->{dir}/unhandled.log") { binmode $fh or croak "binmode: $!"; while (<$fh>) { - scan($r, \%empty_dirs, $_) or last; + scan($r, \%paths_table, $_) or last; } close $fh; } + collect_paths(\%paths_table, \@empty_dirs); my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/; - foreach my $d (sort keys %empty_dirs) { + foreach my $d (sort @empty_dirs) { $d = uri_decode($d); $d =~ s/$strip//; next unless length($d); @@ -1579,7 +1642,16 @@ sub tie_for_persistent_memoization { my $hash = shift; my $path = shift; - if ($can_use_yaml) { + unless ($memo_backend) { + if (eval { require Git::SVN::Memoize::YAML; 1}) { + $memo_backend = 1; + } else { + require Memoize::Storable; + $memo_backend = -1; + } + } + + if ($memo_backend > 0) { tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml"; } else { tie %$hash => 'Memoize::Storable', "$path.db", 'nstore'; @@ -2189,8 +2261,9 @@ sub rev_map_set { # both of these options make our .rev_db file very, very important # and we can't afford to lose it because rebuild() won't work if ($self->use_svm_props || $self->no_metadata) { + require File::Copy; $sync = 1; - copy($db, $db_lock) or die "rev_map_set(@_): ", + File::Copy::copy($db, $db_lock) or die "rev_map_set(@_): ", "Failed to copy: ", "$db => $db_lock ($!)\n"; } else { |