package Git::SVN::Ra; use vars qw/@ISA $config_dir $_ignore_refs_regex $_log_window_size/; use strict; use warnings; use Memoize; use Git::SVN::Utils qw( canonicalize_url canonicalize_path add_path_to_url ); use SVN::Ra; BEGIN { @ISA = qw(SVN::Ra); } my ($ra_invalid, $can_do_switch, %ignored_err, $RA); BEGIN { # enforce temporary pool usage for some simple functions no strict 'refs'; for my $f (qw/rev_proplist get_latest_revnum get_uuid get_repos_root get_file/) { my $SUPER = "SUPER::$f"; *$f = sub { my $self = shift; my $pool = SVN::Pool->new; my @ret = $self->$SUPER(@_,$pool); $pool->clear; wantarray ? @ret : $ret[0]; }; } } # serf has a bug that leads to a coredump upon termination if the # remote access object is left around (not fixed yet in serf 1.3.1). # Explicitly free it to work around the issue. END { $RA = undef; $ra_invalid = 1; } sub _auth_providers () { require SVN::Client; my @rv = ( SVN::Client::get_simple_provider(), SVN::Client::get_ssl_server_trust_file_provider(), SVN::Client::get_simple_prompt_provider( \&Git::SVN::Prompt::simple, 2), SVN::Client::get_ssl_client_cert_file_provider(), SVN::Client::get_ssl_client_cert_prompt_provider( \&Git::SVN::Prompt::ssl_client_cert, 2), SVN::Client::get_ssl_client_cert_pw_file_provider(), SVN::Client::get_ssl_client_cert_pw_prompt_provider( \&Git::SVN::Prompt::ssl_client_cert_pw, 2), SVN::Client::get_username_provider(), SVN::Client::get_ssl_server_trust_prompt_provider( \&Git::SVN::Prompt::ssl_server_trust), SVN::Client::get_username_prompt_provider( \&Git::SVN::Prompt::username, 2) ); # earlier 1.6.x versions would segfault, and <= 1.5.x didn't have # this function if (::compare_svn_version('1.6.15') >= 0) { my $config = SVN::Core::config_get_config($config_dir); my ($p, @a); # config_get_config returns all config files from # ~/.subversion, auth_get_platform_specific_client_providers # just wants the config "file". @a = ($config->{'config'}, undef); $p = SVN::Core::auth_get_platform_specific_client_providers(@a); # Insert the return value from # auth_get_platform_specific_providers unshift @rv, @$p; } \@rv; } sub prepare_config_once { SVN::_Core::svn_config_ensure($config_dir, undef); my ($baton, $callbacks) = SVN::Core::auth_open_helper(_auth_providers); my $config = SVN::Core::config_get_config($config_dir); my $conf_t = $config->{'config'}; no warnings 'once'; # The usage of $SVN::_Core::SVN_CONFIG_* variables # produces warnings that variables are used only once. # I had not found the better way to shut them up, so # the warnings of type 'once' are disabled in this block. if (SVN::_Core::svn_config_get_bool($conf_t, $SVN::_Core::SVN_CONFIG_SECTION_AUTH, $SVN::_Core::SVN_CONFIG_OPTION_STORE_PASSWORDS, 1) == 0) { my $val = '1'; if (::compare_svn_version('1.9.0') < 0) { # pre-SVN r1553823 my $dont_store_passwords = 1; $val = bless \$dont_store_passwords, "_p_void"; } SVN::_Core::svn_auth_set_parameter($baton, $SVN::_Core::SVN_AUTH_PARAM_DONT_STORE_PASSWORDS, $val); } if (SVN::_Core::svn_config_get_bool($conf_t, $SVN::_Core::SVN_CONFIG_SECTION_AUTH, $SVN::_Core::SVN_CONFIG_OPTION_STORE_AUTH_CREDS, 1) == 0) { $Git::SVN::Prompt::_no_auth_cache = 1; } return ($config, $baton, $callbacks); } # no warnings 'once' INIT { Memoize::memoize '_auth_providers'; Memoize::memoize 'prepare_config_once'; } sub new { my ($class, $url) = @_; $url = canonicalize_url($url); return $RA if ($RA && $RA->url eq $url); ::_req_svn(); $RA = undef; my ($config, $baton, $callbacks) = prepare_config_once(); my $self = SVN::Ra->new(url => $url, auth => $baton, config => $config, pool => SVN::Pool->new, auth_provider_callbacks => $callbacks); $RA = bless $self, $class; # Make sure its canonicalized $self->url($url); $self->{svn_path} = $url; $self->{repos_root} = $self->get_repos_root; $self->{svn_path} =~ s#^\Q$self->{repos_root}\E(/|$)##; $self->{cache} = { check_path => { r => 0, data => {} }, get_dir => { r => 0, data => {} } }; return $RA; } sub url { my $self = shift; if (@_) { my $url = shift; $self->{url} = canonicalize_url($url); return; } return $self->{url}; } sub check_path { my ($self, $path, $r) = @_; my $cache = $self->{cache}->{check_path}; if ($r == $cache->{r} && exists $cache->{data}->{$path}) { return $cache->{data}->{$path}; } my $pool = SVN::Pool->new; my $t = $self->SUPER::check_path($path, $r, $pool); $pool->clear; if ($r != $cache->{r}) { %{$cache->{data}} = (); $cache->{r} = $r; } $cache->{data}->{$path} = $t; } sub get_dir { my ($self, $dir, $r) = @_; my $cache = $self->{cache}->{get_dir}; if ($r == $cache->{r}) { if (my $x = $cache->{data}->{$dir}) { return wantarray ? @$x : $x->[0]; } } my $pool = SVN::Pool->new; my ($d, undef, $props); if (::compare_svn_version('1.4.0') >= 0) { # n.b. in addition to being potentially more efficient, # this works around what appears to be a bug in some # SVN 1.8 versions my $kind = 1; # SVN_DIRENT_KIND ($d, undef, $props) = $self->get_dir2($dir, $r, $kind, $pool); } else { ($d, undef, $props) = $self->SUPER::get_dir($dir, $r, $pool); } my %dirents = map { $_ => { kind => $d->{$_}->kind } } keys %$d; $pool->clear; if ($r != $cache->{r}) { %{$cache->{data}} = (); $cache->{r} = $r; } $cache->{data}->{$dir} = [ \%dirents, $r, $props ]; wantarray ? (\%dirents, $r, $props) : \%dirents; } # get_log(paths, start, end, limit, # discover_changed_paths, strict_node_history, receiver) sub get_log { my ($self, @args) = @_; my $pool = SVN::Pool->new; # svn_log_changed_path_t objects passed to get_log are likely to be # overwritten even if only the refs are copied to an external variable, # so we should dup the structures in their entirety. Using an # externally passed pool (instead of our temporary and quickly cleared # pool in Git::SVN::Ra) does not help matters at all... my $receiver = pop @args; my $prefix = "/".$self->{svn_path}; $prefix =~ s#/+($)##; my $prefix_regex = qr#^\Q$prefix\E#; push(@args, sub { my ($paths) = $_[0]; return &$receiver(@_) unless $paths; $_[0] = (); foreach my $p (keys %$paths) { my $i = $paths->{$p}; # Make path relative to our url, not repos_root $p =~ s/$prefix_regex//; my %s = map { $_ => $i->$_; } qw/copyfrom_path copyfrom_rev action/; if ($s{'copyfrom_path'}) { $s{'copyfrom_path'} =~ s/$prefix_regex//; $s{'copyfrom_path'} = canonicalize_path($s{'copyfrom_path'}); } $_[0]{$p} = \%s; } &$receiver(@_); }); # the limit parameter was not supported in SVN 1.1.x, so we # drop it. Therefore, the receiver callback passed to it # is made aware of this limitation by being wrapped if # the limit passed to is being wrapped. if (::compare_svn_version('1.2.0') <= 0) { my $limit = splice(@args, 3, 1); if ($limit > 0) { my $receiver = pop @args; push(@args, sub { &$receiver(@_) if (--$limit >= 0) }); } } my $ret = $self->SUPER::get_log(@args, $pool); $pool->clear; $ret; } # uncommon, only for ancient SVN (<= 1.4.2) sub trees_match { require IO::File; require SVN::Client; my ($self, $url1, $rev1, $url2, $rev2) = @_; my $ctx = SVN::Client->new(auth => _auth_providers); my $out = IO::File->new_tmpfile; # older SVN (1.1.x) doesn't take $pool as the last parameter for # $ctx->diff(), so we'll create a default one my $pool = SVN::Pool->new_default_sub; $ra_invalid = 1; # this will open a new SVN::Ra connection to $url1 $ctx->diff([], $url1, $rev1, $url2, $rev2, 1, 1, 0, $out, $out); $out->flush; my $ret = (($out->stat)[7] == 0); close $out or croak $!; $ret; } sub get_commit_editor { my ($self, $log, $cb, $pool) = @_; my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef, 0) : (); $self->SUPER::get_commit_editor($log, $cb, @lock, $pool); } sub gs_do_update { my ($self, $rev_a, $rev_b, $gs, $editor) = @_; my $new = ($rev_a == $rev_b); my $path = $gs->path; if ($new && -e $gs->{index}) { unlink $gs->{index} or die "Couldn't unlink index: $gs->{index}: $!\n"; } my $pool = SVN::Pool->new; $editor->set_path_strip($path); my (@pc) = split m#/#, $path; my $reporter = $self->do_update($rev_b, (@pc ? shift @pc : ''), 1, $editor, $pool); my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : (); # Since we can't rely on svn_ra_reparent being available, we'll # just have to do some magic with set_path to make it so # we only want a partial path. my $sp = ''; my $final = join('/', @pc); while (@pc) { $reporter->set_path($sp, $rev_b, 0, @lock, $pool); $sp .= '/' if length $sp; $sp .= shift @pc; } die "BUG: '$sp' != '$final'\n" if ($sp ne $final); $reporter->set_path($sp, $rev_a, $new, @lock, $pool); $reporter->finish_report($pool); $pool->clear; $editor->{git_commit_ok}; } # this requires SVN 1.4.3 or later (do_switch didn't work before 1.4.3, and # svn_ra_reparent didn't work before 1.4) sub gs_do_switch { my ($self, $rev_a, $rev_b, $gs, $url_b, $editor) = @_; my $path = $gs->path; my $pool = SVN::Pool->new; my $old_url = $self->url; my $full_url = add_path_to_url( $self->url, $path ); my ($ra, $reparented); if ($old_url =~ m#^svn(\+\w+)?://# || ($full_url =~ m#^https?://# && canonicalize_url($full_url) ne $full_url)) { $_[0] = undef; $self = undef; $RA = undef; $ra = Git::SVN::Ra->new($full_url); $ra_invalid = 1; } elsif ($old_url ne $full_url) { SVN::_Ra::svn_ra_reparent( $self->{session}, canonicalize_url($full_url), $pool ); $self->url($full_url); $reparented = 1; } $ra ||= $self; $url_b = canonicalize_url($url_b); my $reporter = $ra->do_switch($rev_b, '', 1, $url_b, $editor, $pool); my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : (); $reporter->set_path('', $rev_a, 0, @lock, $pool); $reporter->finish_report($pool); if ($reparented) { SVN::_Ra::svn_ra_reparent($self->{session}, $old_url, $pool); $self->url($old_url); } $pool->clear; $editor->{git_commit_ok}; } sub longest_common_path { my ($gsv, $globs) = @_; my %common; my $common_max = scalar @$gsv; foreach my $gs (@$gsv) { my @tmp = split m#/#, $gs->path; my $p = ''; foreach (@tmp) { $p .= length($p) ? "/$_" : $_; $common{$p} ||= 0; $common{$p}++; } } $globs ||= []; $common_max += scalar @$globs; foreach my $glob (@$globs) { my @tmp = split m#/#, $glob->{path}->{left}; my $p = ''; foreach (@tmp) { $p .= length($p) ? "/$_" : $_; $common{$p} ||= 0; $common{$p}++; } } my $longest_path = ''; foreach (sort {length $b <=> length $a} keys %common) { if ($common{$_} == $common_max) { $longest_path = $_; last; } } $longest_path; } sub gs_fetch_loop_common { my ($self, $base, $head, $gsv, $globs) = @_; return if ($base > $head); # Make sure the cat_blob open2 FileHandle is created before calling # SVN::Pool::new_default so that it does not incorrectly end up in the pool. $::_repository->_open_cat_blob_if_needed; my $gpool = SVN::Pool->new_default; my $ra_url = $self->url; my $reload_ra = sub { $_[0] = undef; $self = undef; $RA = undef; $gpool->clear; $self = Git::SVN::Ra->new($ra_url); $ra_invalid = undef; }; my $inc = $_log_window_size; my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc); my $longest_path = longest_common_path($gsv, $globs); my $find_trailing_edge; while (1) { my %revs; my $err; my $err_handler = $SVN::Error::handler; $SVN::Error::handler = sub { ($err) = @_; skip_unknown_revs($err); }; sub _cb { my ($paths, $r, $author, $date, $log) = @_; [ $paths, { author => $author, date => $date, log => $log } ]; } $self->get_log([$longest_path], $min, $max, 0, 1, 1, sub { $revs{$_[1]} = _cb(@_) }); if ($err) { print "Checked through r$max\r"; } else { $find_trailing_edge = 1; } if ($err and $find_trailing_edge) { print STDERR "Path '$longest_path' ", "was probably deleted:\n", $err->expanded_message, "\nWill attempt to follow ", "revisions r$min .. r$max ", "committed before the deletion\n"; my $hi = $max; while (--$hi >= $min) { my $ok; $self->get_log([$longest_path], $min, $hi, 0, 1, 1, sub { $ok = $_[1]; $revs{$_[1]} = _cb(@_) }); if ($ok) { print STDERR "r$min .. r$ok OK\n"; last; } } $find_trailing_edge = 0; } $SVN::Error::handler = $err_handler; my %exists = map { $_->path => $_ } @$gsv; foreach my $r (sort {$a <=> $b} keys %revs) { my ($paths, $logged) = @{delete $revs{$r}}; foreach my $gs ($self->match_globs(\%exists, $paths, $globs, $r)) { if ($gs->rev_map_max >= $r) { next; } next unless $gs->match_paths($paths, $r); $gs->{logged_rev_props} = $logged; if (my $last_commit = $gs->last_commit) { $gs->assert_index_clean($last_commit); } my $log_entry = $gs->do_fetch($paths, $r); if ($log_entry) { $gs->do_git_commit($log_entry); } $Git::SVN::INDEX_FILES{$gs->{index}} = 1; } foreach my $g (@$globs) { my $k = "svn-remote.$g->{remote}." . "$g->{t}-maxRev"; Git::SVN::tmp_config($k, $r); } $reload_ra->() if $ra_invalid; } # pre-fill the .rev_db since it'll eventually get filled in # with '0' x40 if something new gets committed foreach my $gs (@$gsv) { next if $gs->rev_map_max >= $max; next if defined $gs->rev_map_get($max); $gs->rev_map_set($max, 0 x40); } foreach my $g (@$globs) { my $k = "svn-remote.$g->{remote}.$g->{t}-maxRev"; Git::SVN::tmp_config($k, $max); } last if $max >= $head; $min = $max + 1; $max += $inc; $max = $head if ($max > $head); $reload_ra->(); } Git::SVN::gc(); } sub get_dir_globbed { my ($self, $left, $depth, $r) = @_; my @x = eval { $self->get_dir($left, $r) }; return unless scalar @x == 3; my $dirents = $x[0]; my @finalents; foreach my $de (keys %$dirents) { next if $dirents->{$de}->{kind} != $SVN::Node::dir; if ($depth > 1) { my @args = ("$left/$de", $depth - 1, $r); foreach my $dir ($self->get_dir_globbed(@args)) { push @finalents, "$de/$dir"; } } else { push @finalents, $de; } } @finalents; } # return value: 0 -- don't ignore, 1 -- ignore sub is_ref_ignored { my ($g, $p) = @_; my $refname = $g->{ref}->full_path($p); return 1 if defined($g->{ignore_refs_regex}) && $refname =~ m!$g->{ignore_refs_regex}!; return 0 unless defined($_ignore_refs_regex); return 1 if $refname =~ m!$_ignore_refs_regex!o; return 0; } sub match_globs { my ($self, $exists, $paths, $globs, $r) = @_; sub get_dir_check { my ($self, $exists, $g, $r) = @_; my @dirs = $self->get_dir_globbed($g->{path}->{left}, $g->{path}->{depth}, $r); foreach my $de (@dirs) { my $p = $g->{path}->full_path($de); next if $exists->{$p}; next if (length $g->{path}->{right} && ($self->check_path($p, $r) != $SVN::Node::dir)); next unless $p =~ /$g->{path}->{regex}/; $exists->{$p} = Git::SVN->init($self->url, $p, undef, $g->{ref}->full_path($de), 1); } } foreach my $g (@$globs) { if (my $path = $paths->{"/$g->{path}->{left}"}) { if ($path->{action} =~ /^[AR]$/) { get_dir_check($self, $exists, $g, $r); } } foreach (keys %$paths) { if (/$g->{path}->{left_regex}/ && !/$g->{path}->{regex}/) { next if $paths->{$_}->{action} !~ /^[AR]$/; get_dir_check($self, $exists, $g, $r); } next unless /$g->{path}->{regex}/; my $p = $1; my $pathname = $g->{path}->full_path($p); next if is_ref_ignored($g, $p); next if $exists->{$pathname}; next if ($self->check_path($pathname, $r) != $SVN::Node::dir); $exists->{$pathname} = Git::SVN->init( $self->url, $pathname, undef, $g->{ref}->full_path($p), 1); } my $c = ''; foreach (split m#/#, $g->{path}->{left}) { $c .= "/$_"; next unless ($paths->{$c} && ($paths->{$c}->{action} =~ /^[AR]$/)); get_dir_check($self, $exists, $g, $r); } } values %$exists; } sub minimize_url { my ($self) = @_; return $self->url if ($self->url eq $self->{repos_root}); my $url = $self->{repos_root}; my @components = split(m!/!, $self->{svn_path}); my $c = ''; do { $url = add_path_to_url($url, $c); eval { my $ra = (ref $self)->new($url); my $latest = $ra->get_latest_revnum; $ra->get_log("", $latest, 0, 1, 0, 1, sub {}); }; } while ($@ && defined($c = shift @components)); return canonicalize_url($url); } sub can_do_switch { my $self = shift; unless (defined $can_do_switch) { my $pool = SVN::Pool->new; my $rep = eval { $self->do_switch(1, '', 0, $self->url, SVN::Delta::Editor->new, $pool); }; if ($@) { $can_do_switch = 0; } else { $rep->abort_report($pool); $can_do_switch = 1; } $pool->clear; } $can_do_switch; } sub skip_unknown_revs { my ($err) = @_; my $errno = $err->apr_err(); # Maybe the branch we're tracking didn't # exist when the repo started, so it's # not an error if it doesn't, just continue # # Wonderfully consistent library, eh? # 160013 - svn:// and file:// # 175002 - http(s):// # 175007 - http(s):// (this repo required authorization, too...) # More codes may be discovered later... if ($errno == 175007 || $errno == 175002 || $errno == 160013) { my $err_key = $err->expanded_message; # revision numbers change every time, filter them out $err_key =~ s/\d+/\0/g; $err_key = "$errno\0$err_key"; unless ($ignored_err{$err_key}) { warn "W: Ignoring error from SVN, path probably ", "does not exist: ($errno): ", $err->expanded_message,"\n"; warn "W: Do not be alarmed at the above message ", "git-svn is just searching aggressively for ", "old history.\n", "This may take a while on large repositories\n"; $ignored_err{$err_key} = 1; } return; } die "Error from SVN, ($errno): ", $err->expanded_message,"\n"; } 1; __END__ =head1 NAME Git::SVN::Ra - Subversion remote access functions for git-svn =head1 SYNOPSIS use Git::SVN::Ra; my $ra = Git::SVN::Ra->new($branchurl); my ($dirents, $fetched_revnum, $props) = $ra->get_dir('.', $SVN::Core::INVALID_REVNUM); =head1 DESCRIPTION This is a wrapper around the L<SVN::Ra> module for use by B<git-svn>. It fills in some default parameters (such as the authentication scheme), smooths over incompatibilities between libsvn versions, adds caching, and implements some functions specific to B<git-svn>. Do not use it unless you are developing git-svn. The interface will change as git-svn evolves. =head1 DEPENDENCIES Subversion perl bindings, L<Git::SVN>. C<Git::SVN::Ra> has not been tested using callers other than B<git-svn> itself. =head1 SEE ALSO L<SVN::Ra>. =head1 INCOMPATIBILITIES None reported. =head1 BUGS None.