diff options
Diffstat (limited to 'perl/Git/SVN.pm')
-rw-r--r-- | perl/Git/SVN.pm | 154 |
1 files changed, 125 insertions, 29 deletions
diff --git a/perl/Git/SVN.pm b/perl/Git/SVN.pm index 152fb7e927..bc4eed3d75 100644 --- a/perl/Git/SVN.pm +++ b/perl/Git/SVN.pm @@ -98,6 +98,11 @@ sub resolve_local_globs { " globbed: $refname\n"; } my $u = (::cmt_metadata("$refname"))[0]; + if (!defined($u)) { + warn +"W: $refname: no associated commit metadata from SVN, skipping\n"; + next; + } $u =~ s!^\Q$url\E(/|$)!! or die "$refname: '$url' not found in '$u'\n"; if ($pathname ne $u) { @@ -485,7 +490,7 @@ sub refname { # # Additionally, % must be escaped because it is used for escaping # and we want our escaped refname to be reversible - $refname =~ s{([ \%~\^:\?\*\[\t])}{sprintf('%%%02X',ord($1))}eg; + $refname =~ s{([ \%~\^:\?\*\[\t\\])}{sprintf('%%%02X',ord($1))}eg; # no slash-separated component can begin with a dot . # /.* becomes /%2E* @@ -802,10 +807,15 @@ sub get_fetch_range { (++$min, $max); } +sub svn_dir { + command_oneline(qw(rev-parse --git-path svn)); +} + sub tmp_config { my (@args) = @_; - my $old_def_config = "$ENV{GIT_DIR}/svn/config"; - my $config = "$ENV{GIT_DIR}/svn/.metadata"; + my $svn_dir = svn_dir(); + my $old_def_config = "$svn_dir/config"; + my $config = "$svn_dir/.metadata"; if (! -f $config && -f $old_def_config) { rename $old_def_config, $config or die "Failed rename $old_def_config => $config: $!\n"; @@ -1211,20 +1221,87 @@ sub do_fetch { sub mkemptydirs { my ($self, $r) = @_; + # add/remove/collect a paths table + # + # Paths are split into a tree of nodes, stored as a hash of hashes. + # + # Each node contains a 'path' entry for the path (if any) associated + # with that node and a 'children' entry for any nodes under that + # location. + # + # Removing a path requires a hash lookup for each component then + # dropping that node (and anything under it), which is substantially + # faster than a grep slice into a single hash of paths for large + # numbers of paths. + # + # For a large (200K) number of empty_dir directives this reduces + # scanning time to 3 seconds vs 10 minutes for grep+delete on a single + # hash of paths. + sub add_path { + my ($paths_table, $path) = @_; + my $node_ref; + + foreach my $x (split('/', $path)) { + if (!exists($paths_table->{$x})) { + $paths_table->{$x} = { children => {} }; + } + + $node_ref = $paths_table->{$x}; + $paths_table = $paths_table->{$x}->{children}; + } + + $node_ref->{path} = $path; + } + + sub remove_path { + my ($paths_table, $path) = @_; + my $nodes_ref; + my $node_name; + + foreach my $x (split('/', $path)) { + if (!exists($paths_table->{$x})) { + return; + } + + $nodes_ref = $paths_table; + $node_name = $x; + + $paths_table = $paths_table->{$x}->{children}; + } + + delete($nodes_ref->{$node_name}); + } + + sub collect_paths { + my ($paths_table, $paths_ref) = @_; + + foreach my $v (values %$paths_table) { + my $p = $v->{path}; + my $c = $v->{children}; + + collect_paths($c, $paths_ref); + + if (defined($p)) { + push(@$paths_ref, $p); + } + } + } + sub scan { - my ($r, $empty_dirs, $line) = @_; + my ($r, $paths_table, $line) = @_; if (defined $r && $line =~ /^r(\d+)$/) { return 0 if $1 > $r; } elsif ($line =~ /^ \+empty_dir: (.+)$/) { - $empty_dirs->{$1} = 1; + add_path($paths_table, $1); } elsif ($line =~ /^ \-empty_dir: (.+)$/) { - my @d = grep {m[^\Q$1\E(/|$)]} (keys %$empty_dirs); - delete @$empty_dirs{@d}; + remove_path($paths_table, $1); } 1; # continue }; - my %empty_dirs = (); + my @empty_dirs; + my %paths_table; + my $gz_file = "$self->{dir}/unhandled.log.gz"; if (-f $gz_file) { if (!can_compress()) { @@ -1235,7 +1312,7 @@ sub mkemptydirs { die "Unable to open $gz_file: $!\n"; my $line; while ($gz->gzreadline($line) > 0) { - scan($r, \%empty_dirs, $line) or last; + scan($r, \%paths_table, $line) or last; } $gz->gzclose; } @@ -1244,13 +1321,14 @@ sub mkemptydirs { if (open my $fh, '<', "$self->{dir}/unhandled.log") { binmode $fh or croak "binmode: $!"; while (<$fh>) { - scan($r, \%empty_dirs, $_) or last; + scan($r, \%paths_table, $_) or last; } close $fh; } + collect_paths(\%paths_table, \@empty_dirs); my $strip = qr/\A\Q@{[$self->path]}\E(?:\/|$)/; - foreach my $d (sort keys %empty_dirs) { + foreach my $d (sort @empty_dirs) { $d = uri_decode($d); $d =~ s/$strip//; next unless length($d); @@ -1338,7 +1416,7 @@ sub parse_svn_date { delete $ENV{TZ}; } - my $our_TZ = get_tz_offset(); + my $our_TZ = get_tz_offset($epoch_in_UTC); # This converts $epoch_in_UTC into our local timezone. my ($sec, $min, $hour, $mday, $mon, $year, @@ -1585,7 +1663,17 @@ sub tie_for_persistent_memoization { if ($memo_backend > 0) { tie %$hash => 'Git::SVN::Memoize::YAML', "$path.yaml"; } else { - tie %$hash => 'Memoize::Storable', "$path.db", 'nstore'; + # first verify that any existing file can actually be loaded + # (it may have been saved by an incompatible version) + my $db = "$path.db"; + if (-e $db) { + use Storable qw(retrieve); + + if (!eval { retrieve($db); 1 }) { + unlink $db or die "unlink $db failed: $!"; + } + } + tie %$hash => 'Memoize::Storable', $db, 'nstore'; } } @@ -1598,7 +1686,7 @@ sub tie_for_persistent_memoization { return if $memoized; $memoized = 1; - my $cache_path = "$ENV{GIT_DIR}/svn/.caches/"; + my $cache_path = svn_dir() . '/.caches/'; mkpath([$cache_path]) unless -d $cache_path; my %lookup_svn_merge_cache; @@ -1639,7 +1727,7 @@ sub tie_for_persistent_memoization { sub clear_memoized_mergeinfo_caches { die "Only call this method in non-memoized context" if ($memoized); - my $cache_path = "$ENV{GIT_DIR}/svn/.caches/"; + my $cache_path = svn_dir() . '/.caches/'; return unless -d $cache_path; for my $cache_file (("$cache_path/lookup_svn_merge", @@ -1836,15 +1924,22 @@ sub make_log_entry { my @parents = @$parents; my $props = $ed->{dir_prop}{$self->path}; - if ( $props->{"svk:merge"} ) { - $self->find_extra_svk_parents($props->{"svk:merge"}, \@parents); - } - if ( $props->{"svn:mergeinfo"} ) { - my $mi_changes = $self->mergeinfo_changes - ($parent_path, $parent_rev, - $self->path, $rev, - $props->{"svn:mergeinfo"}); - $self->find_extra_svn_parents($mi_changes, \@parents); + if ($self->follow_parent) { + my $tickets = $props->{"svk:merge"}; + if ($tickets) { + $self->find_extra_svk_parents($tickets, \@parents); + } + + my $mergeinfo_prop = $props->{"svn:mergeinfo"}; + if ($mergeinfo_prop) { + my $mi_changes = $self->mergeinfo_changes( + $parent_path, + $parent_rev, + $self->path, + $rev, + $mergeinfo_prop); + $self->find_extra_svn_parents($mi_changes, \@parents); + } } open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!; @@ -2366,12 +2461,13 @@ sub _new { "refs/remotes/$prefix$default_ref_id"; } $_[1] = $repo_id; - my $dir = "$ENV{GIT_DIR}/svn/$ref_id"; + my $svn_dir = svn_dir(); + my $dir = "$svn_dir/$ref_id"; - # Older repos imported by us used $GIT_DIR/svn/foo instead of - # $GIT_DIR/svn/refs/remotes/foo when tracking refs/remotes/foo + # Older repos imported by us used $svn_dir/foo instead of + # $svn_dir/refs/remotes/foo when tracking refs/remotes/foo if ($ref_id =~ m{^refs/remotes/(.+)}) { - my $old_dir = "$ENV{GIT_DIR}/svn/$1"; + my $old_dir = "$svn_dir/$1"; if (-d $old_dir && ! -d $dir) { $dir = $old_dir; } @@ -2381,7 +2477,7 @@ sub _new { mkpath([$dir]); my $obj = bless { ref_id => $ref_id, dir => $dir, index => "$dir/index", - config => "$ENV{GIT_DIR}/svn/config", + config => "$svn_dir/config", map_root => "$dir/.rev_map", repo_id => $repo_id }, $class; # Ensure it gets canonicalized |