summaryrefslogtreecommitdiff
path: root/perl/Git/SVN
diff options
context:
space:
mode:
Diffstat (limited to 'perl/Git/SVN')
-rw-r--r--perl/Git/SVN/Editor.pm607
-rw-r--r--perl/Git/SVN/Fetcher.pm627
-rw-r--r--perl/Git/SVN/GlobSpec.pm65
-rw-r--r--perl/Git/SVN/Log.pm400
-rw-r--r--perl/Git/SVN/Memoize/YAML.pm93
-rw-r--r--perl/Git/SVN/Migration.pm258
-rw-r--r--perl/Git/SVN/Prompt.pm184
-rw-r--r--perl/Git/SVN/Ra.pm708
-rw-r--r--perl/Git/SVN/Utils.pm232
9 files changed, 3174 insertions, 0 deletions
diff --git a/perl/Git/SVN/Editor.pm b/perl/Git/SVN/Editor.pm
new file mode 100644
index 0000000000..4c4199afec
--- /dev/null
+++ b/perl/Git/SVN/Editor.pm
@@ -0,0 +1,607 @@
+package Git::SVN::Editor;
+use vars qw/@ISA $_rmdir $_cp_similarity $_find_copies_harder $_rename_limit/;
+use strict;
+use warnings;
+use SVN::Core;
+use SVN::Delta;
+use Carp qw/croak/;
+use Git qw/command command_oneline command_noisy command_output_pipe
+ command_input_pipe command_close_pipe
+ command_bidi_pipe command_close_bidi_pipe/;
+BEGIN {
+ @ISA = qw(SVN::Delta::Editor);
+}
+
+sub new {
+ my ($class, $opts) = @_;
+ foreach (qw/svn_path r ra tree_a tree_b log editor_cb/) {
+ die "$_ required!\n" unless (defined $opts->{$_});
+ }
+
+ my $pool = SVN::Pool->new;
+ my $mods = generate_diff($opts->{tree_a}, $opts->{tree_b});
+ my $types = check_diff_paths($opts->{ra}, $opts->{svn_path},
+ $opts->{r}, $mods);
+
+ # $opts->{ra} functions should not be used after this:
+ my @ce = $opts->{ra}->get_commit_editor($opts->{log},
+ $opts->{editor_cb}, $pool);
+ my $self = SVN::Delta::Editor->new(@ce, $pool);
+ bless $self, $class;
+ foreach (qw/svn_path r tree_a tree_b/) {
+ $self->{$_} = $opts->{$_};
+ }
+ $self->{url} = $opts->{ra}->{url};
+ $self->{mods} = $mods;
+ $self->{types} = $types;
+ $self->{pool} = $pool;
+ $self->{bat} = { '' => $self->open_root($self->{r}, $self->{pool}) };
+ $self->{rm} = { };
+ $self->{path_prefix} = length $self->{svn_path} ?
+ "$self->{svn_path}/" : '';
+ $self->{config} = $opts->{config};
+ $self->{mergeinfo} = $opts->{mergeinfo};
+ $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
+ return $self;
+}
+
+sub generate_diff {
+ my ($tree_a, $tree_b) = @_;
+ my @diff_tree = qw(diff-tree -z -r);
+ if ($_cp_similarity) {
+ push @diff_tree, "-C$_cp_similarity";
+ } else {
+ push @diff_tree, '-C';
+ }
+ push @diff_tree, '--find-copies-harder' if $_find_copies_harder;
+ push @diff_tree, "-l$_rename_limit" if defined $_rename_limit;
+ push @diff_tree, $tree_a, $tree_b;
+ my ($diff_fh, $ctx) = command_output_pipe(@diff_tree);
+ local $/ = "\0";
+ my $state = 'meta';
+ my @mods;
+ while (<$diff_fh>) {
+ chomp $_; # this gets rid of the trailing "\0"
+ if ($state eq 'meta' && /^:(\d{6})\s(\d{6})\s
+ ($::sha1)\s($::sha1)\s
+ ([MTCRAD])\d*$/xo) {
+ push @mods, { mode_a => $1, mode_b => $2,
+ sha1_a => $3, sha1_b => $4,
+ chg => $5 };
+ if ($5 =~ /^(?:C|R)$/) {
+ $state = 'file_a';
+ } else {
+ $state = 'file_b';
+ }
+ } elsif ($state eq 'file_a') {
+ my $x = $mods[$#mods] or croak "Empty array\n";
+ if ($x->{chg} !~ /^(?:C|R)$/) {
+ croak "Error parsing $_, $x->{chg}\n";
+ }
+ $x->{file_a} = $_;
+ $state = 'file_b';
+ } elsif ($state eq 'file_b') {
+ my $x = $mods[$#mods] or croak "Empty array\n";
+ if (exists $x->{file_a} && $x->{chg} !~ /^(?:C|R)$/) {
+ croak "Error parsing $_, $x->{chg}\n";
+ }
+ if (!exists $x->{file_a} && $x->{chg} =~ /^(?:C|R)$/) {
+ croak "Error parsing $_, $x->{chg}\n";
+ }
+ $x->{file_b} = $_;
+ $state = 'meta';
+ } else {
+ croak "Error parsing $_\n";
+ }
+ }
+ command_close_pipe($diff_fh, $ctx);
+ \@mods;
+}
+
+sub check_diff_paths {
+ my ($ra, $pfx, $rev, $mods) = @_;
+ my %types;
+ $pfx .= '/' if length $pfx;
+
+ sub type_diff_paths {
+ my ($ra, $types, $path, $rev) = @_;
+ my @p = split m#/+#, $path;
+ my $c = shift @p;
+ unless (defined $types->{$c}) {
+ $types->{$c} = $ra->check_path($c, $rev);
+ }
+ while (@p) {
+ $c .= '/' . shift @p;
+ next if defined $types->{$c};
+ $types->{$c} = $ra->check_path($c, $rev);
+ }
+ }
+
+ foreach my $m (@$mods) {
+ foreach my $f (qw/file_a file_b/) {
+ next unless defined $m->{$f};
+ my ($dir) = ($m->{$f} =~ m#^(.*?)/?(?:[^/]+)$#);
+ if (length $pfx.$dir && ! defined $types{$dir}) {
+ type_diff_paths($ra, \%types, $pfx.$dir, $rev);
+ }
+ }
+ }
+ \%types;
+}
+
+sub split_path {
+ return ($_[0] =~ m#^(.*?)/?([^/]+)$#);
+}
+
+sub repo_path {
+ my ($self, $path) = @_;
+ if (my $enc = $self->{pathnameencoding}) {
+ require Encode;
+ Encode::from_to($path, $enc, 'UTF-8');
+ }
+ $self->{path_prefix}.(defined $path ? $path : '');
+}
+
+sub url_path {
+ my ($self, $path) = @_;
+ $path = $self->repo_path($path);
+ if ($self->{url} =~ m#^https?://#) {
+ # characters are taken from subversion/libsvn_subr/path.c
+ $path =~ s#([^~a-zA-Z0-9_./!$&'()*+,-])#sprintf("%%%02X",ord($1))#eg;
+ }
+ $self->{url} . '/' . $path;
+}
+
+sub rmdirs {
+ my ($self) = @_;
+ my $rm = $self->{rm};
+ delete $rm->{''}; # we never delete the url we're tracking
+ return unless %$rm;
+
+ foreach (keys %$rm) {
+ my @d = split m#/#, $_;
+ my $c = shift @d;
+ $rm->{$c} = 1;
+ while (@d) {
+ $c .= '/' . shift @d;
+ $rm->{$c} = 1;
+ }
+ }
+ delete $rm->{$self->{svn_path}};
+ delete $rm->{''}; # we never delete the url we're tracking
+ return unless %$rm;
+
+ my ($fh, $ctx) = command_output_pipe(qw/ls-tree --name-only -r -z/,
+ $self->{tree_b});
+ local $/ = "\0";
+ while (<$fh>) {
+ chomp;
+ my @dn = split m#/#, $_;
+ while (pop @dn) {
+ delete $rm->{join '/', @dn};
+ }
+ unless (%$rm) {
+ close $fh;
+ return;
+ }
+ }
+ command_close_pipe($fh, $ctx);
+
+ my ($r, $p, $bat) = ($self->{r}, $self->{pool}, $self->{bat});
+ foreach my $d (sort { $b =~ tr#/#/# <=> $a =~ tr#/#/# } keys %$rm) {
+ $self->close_directory($bat->{$d}, $p);
+ my ($dn) = ($d =~ m#^(.*?)/?(?:[^/]+)$#);
+ print "\tD+\t$d/\n" unless $::_q;
+ $self->SUPER::delete_entry($d, $r, $bat->{$dn}, $p);
+ delete $bat->{$d};
+ }
+}
+
+sub open_or_add_dir {
+ my ($self, $full_path, $baton, $deletions) = @_;
+ my $t = $self->{types}->{$full_path};
+ if (!defined $t) {
+ die "$full_path not known in r$self->{r} or we have a bug!\n";
+ }
+ {
+ no warnings 'once';
+ # SVN::Node::none and SVN::Node::file are used only once,
+ # so we're shutting up Perl's warnings about them.
+ if ($t == $SVN::Node::none || defined($deletions->{$full_path})) {
+ return $self->add_directory($full_path, $baton,
+ undef, -1, $self->{pool});
+ } elsif ($t == $SVN::Node::dir) {
+ return $self->open_directory($full_path, $baton,
+ $self->{r}, $self->{pool});
+ } # no warnings 'once'
+ print STDERR "$full_path already exists in repository at ",
+ "r$self->{r} and it is not a directory (",
+ ($t == $SVN::Node::file ? 'file' : 'unknown'),"/$t)\n";
+ } # no warnings 'once'
+ exit 1;
+}
+
+sub ensure_path {
+ my ($self, $path, $deletions) = @_;
+ my $bat = $self->{bat};
+ my $repo_path = $self->repo_path($path);
+ return $bat->{''} unless (length $repo_path);
+
+ my @p = split m#/+#, $repo_path;
+ my $c = shift @p;
+ $bat->{$c} ||= $self->open_or_add_dir($c, $bat->{''}, $deletions);
+ while (@p) {
+ my $c0 = $c;
+ $c .= '/' . shift @p;
+ $bat->{$c} ||= $self->open_or_add_dir($c, $bat->{$c0}, $deletions);
+ }
+ return $bat->{$c};
+}
+
+# Subroutine to convert a globbing pattern to a regular expression.
+# From perl cookbook.
+sub glob2pat {
+ my $globstr = shift;
+ my %patmap = ('*' => '.*', '?' => '.', '[' => '[', ']' => ']');
+ $globstr =~ s{(.)} { $patmap{$1} || "\Q$1" }ge;
+ return '^' . $globstr . '$';
+}
+
+sub check_autoprop {
+ my ($self, $pattern, $properties, $file, $fbat) = @_;
+ # Convert the globbing pattern to a regular expression.
+ my $regex = glob2pat($pattern);
+ # Check if the pattern matches the file name.
+ if($file =~ m/($regex)/) {
+ # Parse the list of properties to set.
+ my @props = split(/;/, $properties);
+ foreach my $prop (@props) {
+ # Parse 'name=value' syntax and set the property.
+ if ($prop =~ /([^=]+)=(.*)/) {
+ my ($n,$v) = ($1,$2);
+ for ($n, $v) {
+ s/^\s+//; s/\s+$//;
+ }
+ $self->change_file_prop($fbat, $n, $v);
+ }
+ }
+ }
+}
+
+sub apply_autoprops {
+ my ($self, $file, $fbat) = @_;
+ my $conf_t = ${$self->{config}}{'config'};
+ no warnings 'once';
+ # Check [miscellany]/enable-auto-props in svn configuration.
+ if (SVN::_Core::svn_config_get_bool(
+ $conf_t,
+ $SVN::_Core::SVN_CONFIG_SECTION_MISCELLANY,
+ $SVN::_Core::SVN_CONFIG_OPTION_ENABLE_AUTO_PROPS,
+ 0)) {
+ # Auto-props are enabled. Enumerate them to look for matches.
+ my $callback = sub {
+ $self->check_autoprop($_[0], $_[1], $file, $fbat);
+ };
+ SVN::_Core::svn_config_enumerate(
+ $conf_t,
+ $SVN::_Core::SVN_CONFIG_SECTION_AUTO_PROPS,
+ $callback);
+ }
+}
+
+sub check_attr {
+ my ($attr,$path) = @_;
+ my $val = command_oneline("check-attr", $attr, "--", $path);
+ if ($val) { $val =~ s/^[^:]*:\s*[^:]*:\s*(.*)\s*$/$1/; }
+ return $val;
+}
+
+sub apply_manualprops {
+ my ($self, $file, $fbat) = @_;
+ my $pending_properties = check_attr( "svn-properties", $file );
+ if ($pending_properties eq "") { return; }
+ # Parse the list of properties to set.
+ my @props = split(/;/, $pending_properties);
+ # TODO: get existing properties to compare to
+ # - this fails for add so currently not done
+ # my $existing_props = ::get_svnprops($file);
+ my $existing_props = {};
+ # TODO: caching svn properties or storing them in .gitattributes
+ # would make that faster
+ foreach my $prop (@props) {
+ # Parse 'name=value' syntax and set the property.
+ if ($prop =~ /([^=]+)=(.*)/) {
+ my ($n,$v) = ($1,$2);
+ for ($n, $v) {
+ s/^\s+//; s/\s+$//;
+ }
+ my $existing = $existing_props->{$n};
+ if (!defined($existing) || $existing ne $v) {
+ $self->change_file_prop($fbat, $n, $v);
+ }
+ }
+ }
+}
+
+sub A {
+ my ($self, $m, $deletions) = @_;
+ my ($dir, $file) = split_path($m->{file_b});
+ my $pbat = $self->ensure_path($dir, $deletions);
+ my $fbat = $self->add_file($self->repo_path($m->{file_b}), $pbat,
+ undef, -1);
+ print "\tA\t$m->{file_b}\n" unless $::_q;
+ $self->apply_autoprops($file, $fbat);
+ $self->apply_manualprops($m->{file_b}, $fbat);
+ $self->chg_file($fbat, $m);
+ $self->close_file($fbat,undef,$self->{pool});
+}
+
+sub C {
+ my ($self, $m, $deletions) = @_;
+ my ($dir, $file) = split_path($m->{file_b});
+ my $pbat = $self->ensure_path($dir, $deletions);
+ # workaround for a bug in svn serf backend (v1.8.5 and below):
+ # store third argument to ->add_file() in a local variable, to make it
+ # have the same lifetime as $fbat
+ my $upa = $self->url_path($m->{file_a});
+ my $fbat = $self->add_file($self->repo_path($m->{file_b}), $pbat,
+ $upa, $self->{r});
+ print "\tC\t$m->{file_a} => $m->{file_b}\n" unless $::_q;
+ $self->apply_manualprops($m->{file_b}, $fbat);
+ $self->chg_file($fbat, $m);
+ $self->close_file($fbat,undef,$self->{pool});
+}
+
+sub delete_entry {
+ my ($self, $path, $pbat) = @_;
+ my $rpath = $self->repo_path($path);
+ my ($dir, $file) = split_path($rpath);
+ $self->{rm}->{$dir} = 1;
+ $self->SUPER::delete_entry($rpath, $self->{r}, $pbat, $self->{pool});
+}
+
+sub R {
+ my ($self, $m, $deletions) = @_;
+ my ($dir, $file) = split_path($m->{file_b});
+ my $pbat = $self->ensure_path($dir, $deletions);
+ # workaround for a bug in svn serf backend, see comment in C() above
+ my $upa = $self->url_path($m->{file_a});
+ my $fbat = $self->add_file($self->repo_path($m->{file_b}), $pbat,
+ $upa, $self->{r});
+ print "\tR\t$m->{file_a} => $m->{file_b}\n" unless $::_q;
+ $self->apply_autoprops($file, $fbat);
+ $self->apply_manualprops($m->{file_b}, $fbat);
+ $self->chg_file($fbat, $m);
+ $self->close_file($fbat,undef,$self->{pool});
+
+ ($dir, $file) = split_path($m->{file_a});
+ $pbat = $self->ensure_path($dir, $deletions);
+ $self->delete_entry($m->{file_a}, $pbat);
+}
+
+sub M {
+ my ($self, $m, $deletions) = @_;
+ my ($dir, $file) = split_path($m->{file_b});
+ my $pbat = $self->ensure_path($dir, $deletions);
+ my $fbat = $self->open_file($self->repo_path($m->{file_b}),
+ $pbat,$self->{r},$self->{pool});
+ print "\t$m->{chg}\t$m->{file_b}\n" unless $::_q;
+ $self->apply_manualprops($m->{file_b}, $fbat);
+ $self->chg_file($fbat, $m);
+ $self->close_file($fbat,undef,$self->{pool});
+}
+
+sub T {
+ my ($self, $m, $deletions) = @_;
+
+ # Work around subversion issue 4091: toggling the "is a
+ # symlink" property requires removing and re-adding a
+ # file or else "svn up" on affected clients trips an
+ # assertion and aborts.
+ if (($m->{mode_b} =~ /^120/ && $m->{mode_a} !~ /^120/) ||
+ ($m->{mode_b} !~ /^120/ && $m->{mode_a} =~ /^120/)) {
+ $self->D({
+ mode_a => $m->{mode_a}, mode_b => '000000',
+ sha1_a => $m->{sha1_a}, sha1_b => '0' x 40,
+ chg => 'D', file_b => $m->{file_b}
+ }, $deletions);
+ $self->A({
+ mode_a => '000000', mode_b => $m->{mode_b},
+ sha1_a => '0' x 40, sha1_b => $m->{sha1_b},
+ chg => 'A', file_b => $m->{file_b}
+ }, $deletions);
+ return;
+ }
+
+ $self->M($m, $deletions);
+}
+
+sub change_file_prop {
+ my ($self, $fbat, $pname, $pval) = @_;
+ $self->SUPER::change_file_prop($fbat, $pname, $pval, $self->{pool});
+}
+
+sub change_dir_prop {
+ my ($self, $pbat, $pname, $pval) = @_;
+ $self->SUPER::change_dir_prop($pbat, $pname, $pval, $self->{pool});
+}
+
+sub _chg_file_get_blob ($$$$) {
+ my ($self, $fbat, $m, $which) = @_;
+ my $fh = $::_repository->temp_acquire("git_blob_$which");
+ if ($m->{"mode_$which"} =~ /^120/) {
+ print $fh 'link ' or croak $!;
+ $self->change_file_prop($fbat,'svn:special','*');
+ } elsif ($m->{mode_a} =~ /^120/ && $m->{"mode_$which"} !~ /^120/) {
+ $self->change_file_prop($fbat,'svn:special',undef);
+ }
+ my $blob = $m->{"sha1_$which"};
+ return ($fh,) if ($blob =~ /^0{40}$/);
+ my $size = $::_repository->cat_blob($blob, $fh);
+ croak "Failed to read object $blob" if ($size < 0);
+ $fh->flush == 0 or croak $!;
+ seek $fh, 0, 0 or croak $!;
+
+ my $exp = ::md5sum($fh);
+ seek $fh, 0, 0 or croak $!;
+ return ($fh, $exp);
+}
+
+sub chg_file {
+ my ($self, $fbat, $m) = @_;
+ if ($m->{mode_b} =~ /755$/ && $m->{mode_a} !~ /755$/) {
+ $self->change_file_prop($fbat,'svn:executable','*');
+ } elsif ($m->{mode_b} !~ /755$/ && $m->{mode_a} =~ /755$/) {
+ $self->change_file_prop($fbat,'svn:executable',undef);
+ }
+ my ($fh_a, $exp_a) = _chg_file_get_blob $self, $fbat, $m, 'a';
+ my ($fh_b, $exp_b) = _chg_file_get_blob $self, $fbat, $m, 'b';
+ my $pool = SVN::Pool->new;
+ my $atd = $self->apply_textdelta($fbat, $exp_a, $pool);
+ if (-s $fh_a) {
+ my $txstream = SVN::TxDelta::new ($fh_a, $fh_b, $pool);
+ my $res = SVN::TxDelta::send_txstream($txstream, @$atd, $pool);
+ if (defined $res) {
+ die "Unexpected result from send_txstream: $res\n",
+ "(SVN::Core::VERSION: $SVN::Core::VERSION)\n";
+ }
+ } else {
+ my $got = SVN::TxDelta::send_stream($fh_b, @$atd, $pool);
+ die "Checksum mismatch\nexpected: $exp_b\ngot: $got\n"
+ if ($got ne $exp_b);
+ }
+ Git::temp_release($fh_b, 1);
+ Git::temp_release($fh_a, 1);
+ $pool->clear;
+}
+
+sub D {
+ my ($self, $m, $deletions) = @_;
+ my ($dir, $file) = split_path($m->{file_b});
+ my $pbat = $self->ensure_path($dir, $deletions);
+ print "\tD\t$m->{file_b}\n" unless $::_q;
+ $self->delete_entry($m->{file_b}, $pbat);
+}
+
+sub close_edit {
+ my ($self) = @_;
+ my ($p,$bat) = ($self->{pool}, $self->{bat});
+ foreach (sort { $b =~ tr#/#/# <=> $a =~ tr#/#/# } keys %$bat) {
+ next if $_ eq '';
+ $self->close_directory($bat->{$_}, $p);
+ }
+ $self->close_directory($bat->{''}, $p);
+ $self->SUPER::close_edit($p);
+ $p->clear;
+}
+
+sub abort_edit {
+ my ($self) = @_;
+ $self->SUPER::abort_edit($self->{pool});
+}
+
+sub DESTROY {
+ my $self = shift;
+ $self->SUPER::DESTROY(@_);
+ $self->{pool}->clear;
+}
+
+# this drives the editor
+sub apply_diff {
+ my ($self) = @_;
+ my $mods = $self->{mods};
+ my %o = ( D => 0, C => 1, R => 2, A => 3, M => 4, T => 5 );
+ my %deletions;
+
+ foreach my $m (@$mods) {
+ if ($m->{chg} eq "D") {
+ $deletions{$m->{file_b}} = 1;
+ }
+ }
+
+ foreach my $m (sort { $o{$a->{chg}} <=> $o{$b->{chg}} } @$mods) {
+ my $f = $m->{chg};
+ if (defined $o{$f}) {
+ $self->$f($m, \%deletions);
+ } else {
+ fatal("Invalid change type: $f");
+ }
+ }
+
+ if (defined($self->{mergeinfo})) {
+ $self->change_dir_prop($self->{bat}{''}, "svn:mergeinfo",
+ $self->{mergeinfo});
+ }
+ $self->rmdirs if $_rmdir;
+ if (@$mods == 0 && !defined($self->{mergeinfo})) {
+ $self->abort_edit;
+ } else {
+ $self->close_edit;
+ }
+ return scalar @$mods;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Git::SVN::Editor - commit driver for "git svn set-tree" and dcommit
+
+=head1 SYNOPSIS
+
+ use Git::SVN::Editor;
+ use Git::SVN::Ra;
+
+ my $ra = Git::SVN::Ra->new($url);
+ my %opts = (
+ r => 19,
+ log => "log message",
+ ra => $ra,
+ config => SVN::Core::config_get_config($svn_config_dir),
+ tree_a => "$commit^",
+ tree_b => "$commit",
+ editor_cb => sub { print "Committed r$_[0]\n"; },
+ mergeinfo => "/branches/foo:1-10",
+ svn_path => "trunk"
+ );
+ Git::SVN::Editor->new(\%opts)->apply_diff or print "No changes\n";
+
+ my $re = Git::SVN::Editor::glob2pat("trunk/*");
+ if ($branchname =~ /$re/) {
+ print "matched!\n";
+ }
+
+=head1 DESCRIPTION
+
+This module is an implementation detail of the "git svn" command.
+Do not use it unless you are developing git-svn.
+
+This module adapts the C<SVN::Delta::Editor> object returned by
+C<SVN::Delta::get_commit_editor> and drives it to convey the
+difference between two git tree objects to a remote Subversion
+repository.
+
+The interface will change as git-svn evolves.
+
+=head1 DEPENDENCIES
+
+Subversion perl bindings,
+the core L<Carp> module,
+and git's L<Git> helper module.
+
+C<Git::SVN::Editor> has not been tested using callers other than
+B<git-svn> itself.
+
+=head1 SEE ALSO
+
+L<SVN::Delta>,
+L<Git::SVN::Fetcher>.
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+=head1 BUGS
+
+None.
diff --git a/perl/Git/SVN/Fetcher.pm b/perl/Git/SVN/Fetcher.pm
new file mode 100644
index 0000000000..d8c21ad915
--- /dev/null
+++ b/perl/Git/SVN/Fetcher.pm
@@ -0,0 +1,627 @@
+package Git::SVN::Fetcher;
+use vars qw/@ISA $_ignore_regex $_include_regex $_preserve_empty_dirs
+ $_placeholder_filename @deleted_gpath %added_placeholder
+ $repo_id/;
+use strict;
+use warnings;
+use SVN::Delta;
+use Carp qw/croak/;
+use File::Basename qw/dirname/;
+use Git qw/command command_oneline command_noisy command_output_pipe
+ command_input_pipe command_close_pipe
+ command_bidi_pipe command_close_bidi_pipe/;
+BEGIN {
+ @ISA = qw(SVN::Delta::Editor);
+}
+
+# file baton members: path, mode_a, mode_b, pool, fh, blob, base
+sub new {
+ my ($class, $git_svn, $switch_path) = @_;
+ my $self = SVN::Delta::Editor->new;
+ bless $self, $class;
+ if (exists $git_svn->{last_commit}) {
+ $self->{c} = $git_svn->{last_commit};
+ $self->{empty_symlinks} =
+ _mark_empty_symlinks($git_svn, $switch_path);
+ }
+
+ # some options are read globally, but can be overridden locally
+ # per [svn-remote "..."] section. Command-line options will *NOT*
+ # override options set in an [svn-remote "..."] section
+ $repo_id = $git_svn->{repo_id};
+ my $k = "svn-remote.$repo_id.ignore-paths";
+ my $v = eval { command_oneline('config', '--get', $k) };
+ $self->{ignore_regex} = $v;
+
+ $k = "svn-remote.$repo_id.include-paths";
+ $v = eval { command_oneline('config', '--get', $k) };
+ $self->{include_regex} = $v;
+
+ $k = "svn-remote.$repo_id.preserve-empty-dirs";
+ $v = eval { command_oneline('config', '--get', '--bool', $k) };
+ if ($v && $v eq 'true') {
+ $_preserve_empty_dirs = 1;
+ $k = "svn-remote.$repo_id.placeholder-filename";
+ $v = eval { command_oneline('config', '--get', $k) };
+ $_placeholder_filename = $v;
+ }
+
+ # Load the list of placeholder files added during previous invocations.
+ $k = "svn-remote.$repo_id.added-placeholder";
+ $v = eval { command_oneline('config', '--get-all', $k) };
+ if ($_preserve_empty_dirs && $v) {
+ # command() prints errors to stderr, so we only call it if
+ # command_oneline() succeeded.
+ my @v = command('config', '--get-all', $k);
+ $added_placeholder{ dirname($_) } = $_ foreach @v;
+ }
+
+ $self->{empty} = {};
+ $self->{dir_prop} = {};
+ $self->{file_prop} = {};
+ $self->{absent_dir} = {};
+ $self->{absent_file} = {};
+ require Git::IndexInfo;
+ $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
+ $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
+ $self;
+}
+
+# this uses the Ra object, so it must be called before do_{switch,update},
+# not inside them (when the Git::SVN::Fetcher object is passed) to
+# do_{switch,update}
+sub _mark_empty_symlinks {
+ my ($git_svn, $switch_path) = @_;
+ my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
+ return {} if (!defined($bool)) || (defined($bool) && ! $bool);
+
+ my %ret;
+ my ($rev, $cmt) = $git_svn->last_rev_commit;
+ return {} unless ($rev && $cmt);
+
+ # allow the warning to be printed for each revision we fetch to
+ # ensure the user sees it. The user can also disable the workaround
+ # on the repository even while git svn is running and the next
+ # revision fetched will skip this expensive function.
+ my $printed_warning;
+ chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
+ my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
+ local $/ = "\0";
+ my $pfx = defined($switch_path) ? $switch_path : $git_svn->path;
+ $pfx .= '/' if length($pfx);
+ while (<$ls>) {
+ chomp;
+ s/\A100644 blob $empty_blob\t//o or next;
+ unless ($printed_warning) {
+ print STDERR "Scanning for empty symlinks, ",
+ "this may take a while if you have ",
+ "many empty files\n",
+ "You may disable this with `",
+ "git config svn.brokenSymlinkWorkaround ",
+ "false'.\n",
+ "This may be done in a different ",
+ "terminal without restarting ",
+ "git svn\n";
+ $printed_warning = 1;
+ }
+ my $path = $_;
+ my (undef, $props) =
+ $git_svn->ra->get_file($pfx.$path, $rev, undef);
+ if ($props->{'svn:special'}) {
+ $ret{$path} = 1;
+ }
+ }
+ command_close_pipe($ls, $ctx);
+ \%ret;
+}
+
+# returns true if a given path is inside a ".git" directory
+sub in_dot_git {
+ $_[0] =~ m{(?:^|/)\.git(?:/|$)};
+}
+
+# return value: 0 -- don't ignore, 1 -- ignore
+# This will also check whether the path is explicitly included
+sub is_path_ignored {
+ my ($self, $path) = @_;
+ return 1 if in_dot_git($path);
+ return 1 if defined($self->{ignore_regex}) &&
+ $path =~ m!$self->{ignore_regex}!;
+ return 0 if defined($self->{include_regex}) &&
+ $path =~ m!$self->{include_regex}!;
+ return 0 if defined($_include_regex) &&
+ $path =~ m!$_include_regex!;
+ return 1 if defined($self->{include_regex});
+ return 1 if defined($_include_regex);
+ return 0 unless defined($_ignore_regex);
+ return 1 if $path =~ m!$_ignore_regex!o;
+ return 0;
+}
+
+sub set_path_strip {
+ my ($self, $path) = @_;
+ $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
+}
+
+sub open_root {
+ { path => '' };
+}
+
+sub open_directory {
+ my ($self, $path, $pb, $rev) = @_;
+ { path => $path };
+}
+
+sub git_path {
+ my ($self, $path) = @_;
+ if (my $enc = $self->{pathnameencoding}) {
+ require Encode;
+ Encode::from_to($path, 'UTF-8', $enc);
+ }
+ if ($self->{path_strip}) {
+ $path =~ s!$self->{path_strip}!! or
+ die "Failed to strip path '$path' ($self->{path_strip})\n";
+ }
+ $path;
+}
+
+sub delete_entry {
+ my ($self, $path, $rev, $pb) = @_;
+ return undef if $self->is_path_ignored($path);
+
+ my $gpath = $self->git_path($path);
+ return undef if ($gpath eq '');
+
+ # remove entire directories.
+ my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
+ =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
+ if ($tree) {
+ my ($ls, $ctx) = command_output_pipe(qw/ls-tree
+ -r --name-only -z/,
+ $tree);
+ local $/ = "\0";
+ while (<$ls>) {
+ chomp;
+ my $rmpath = "$gpath/$_";
+ $self->{gii}->remove($rmpath);
+ print "\tD\t$rmpath\n" unless $::_q;
+ }
+ print "\tD\t$gpath/\n" unless $::_q;
+ command_close_pipe($ls, $ctx);
+ } else {
+ $self->{gii}->remove($gpath);
+ print "\tD\t$gpath\n" unless $::_q;
+ }
+ # Don't add to @deleted_gpath if we're deleting a placeholder file.
+ push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
+ $self->{empty}->{$path} = 0;
+ undef;
+}
+
+sub open_file {
+ my ($self, $path, $pb, $rev) = @_;
+ my ($mode, $blob);
+
+ goto out if $self->is_path_ignored($path);
+
+ my $gpath = $self->git_path($path);
+ ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
+ =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
+ unless (defined $mode && defined $blob) {
+ die "$path was not found in commit $self->{c} (r$rev)\n";
+ }
+ if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
+ $mode = '120000';
+ }
+out:
+ { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
+ pool => SVN::Pool->new, action => 'M' };
+}
+
+sub add_file {
+ my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
+ my $mode;
+
+ if (!$self->is_path_ignored($path)) {
+ my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
+ delete $self->{empty}->{$dir};
+ $mode = '100644';
+
+ if ($added_placeholder{$dir}) {
+ # Remove our placeholder file, if we created one.
+ delete_entry($self, $added_placeholder{$dir})
+ unless $path eq $added_placeholder{$dir};
+ delete $added_placeholder{$dir}
+ }
+ }
+
+ { path => $path, mode_a => $mode, mode_b => $mode,
+ pool => SVN::Pool->new, action => 'A' };
+}
+
+sub add_directory {
+ my ($self, $path, $cp_path, $cp_rev) = @_;
+ goto out if $self->is_path_ignored($path);
+ my $gpath = $self->git_path($path);
+ if ($gpath eq '') {
+ my ($ls, $ctx) = command_output_pipe(qw/ls-tree
+ -r --name-only -z/,
+ $self->{c});
+ local $/ = "\0";
+ while (<$ls>) {
+ chomp;
+ $self->{gii}->remove($_);
+ print "\tD\t$_\n" unless $::_q;
+ push @deleted_gpath, $gpath;
+ }
+ command_close_pipe($ls, $ctx);
+ $self->{empty}->{$path} = 0;
+ }
+ my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
+ delete $self->{empty}->{$dir};
+ $self->{empty}->{$path} = 1;
+
+ if ($added_placeholder{$dir}) {
+ # Remove our placeholder file, if we created one.
+ delete_entry($self, $added_placeholder{$dir});
+ delete $added_placeholder{$dir}
+ }
+
+out:
+ { path => $path };
+}
+
+sub change_dir_prop {
+ my ($self, $db, $prop, $value) = @_;
+ return undef if $self->is_path_ignored($db->{path});
+ $self->{dir_prop}->{$db->{path}} ||= {};
+ $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
+ undef;
+}
+
+sub absent_directory {
+ my ($self, $path, $pb) = @_;
+ return undef if $self->is_path_ignored($path);
+ $self->{absent_dir}->{$pb->{path}} ||= [];
+ push @{$self->{absent_dir}->{$pb->{path}}}, $path;
+ undef;
+}
+
+sub absent_file {
+ my ($self, $path, $pb) = @_;
+ return undef if $self->is_path_ignored($path);
+ $self->{absent_file}->{$pb->{path}} ||= [];
+ push @{$self->{absent_file}->{$pb->{path}}}, $path;
+ undef;
+}
+
+sub change_file_prop {
+ my ($self, $fb, $prop, $value) = @_;
+ return undef if $self->is_path_ignored($fb->{path});
+ if ($prop eq 'svn:executable') {
+ if ($fb->{mode_b} != 120000) {
+ $fb->{mode_b} = defined $value ? 100755 : 100644;
+ }
+ } elsif ($prop eq 'svn:special') {
+ $fb->{mode_b} = defined $value ? 120000 : 100644;
+ } else {
+ $self->{file_prop}->{$fb->{path}} ||= {};
+ $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
+ }
+ undef;
+}
+
+sub apply_textdelta {
+ my ($self, $fb, $exp) = @_;
+ return undef if $self->is_path_ignored($fb->{path});
+ my $suffix = 0;
+ ++$suffix while $::_repository->temp_is_locked("svn_delta_${$}_$suffix");
+ my $fh = $::_repository->temp_acquire("svn_delta_${$}_$suffix");
+ # $fh gets auto-closed() by SVN::TxDelta::apply(),
+ # (but $base does not,) so dup() it for reading in close_file
+ open my $dup, '<&', $fh or croak $!;
+ my $base = $::_repository->temp_acquire("git_blob_${$}_$suffix");
+ # close_file may call temp_acquire on 'svn_hash', but because of the
+ # call chain, if the temp_acquire call from close_file ends up being the
+ # call that first creates the 'svn_hash' temp file, then the FileHandle
+ # that's created as a result will end up in an SVN::Pool that we clear
+ # in SVN::Ra::gs_fetch_loop_common. Avoid that by making sure the
+ # 'svn_hash' FileHandle is already created before close_file is called.
+ my $tmp_fh = $::_repository->temp_acquire('svn_hash');
+ $::_repository->temp_release($tmp_fh, 1);
+
+ if ($fb->{blob}) {
+ my ($base_is_link, $size);
+
+ if ($fb->{mode_a} eq '120000' &&
+ ! $self->{empty_symlinks}->{$fb->{path}}) {
+ print $base 'link ' or die "print $!\n";
+ $base_is_link = 1;
+ }
+ retry:
+ $size = $::_repository->cat_blob($fb->{blob}, $base);
+ die "Failed to read object $fb->{blob}" if ($size < 0);
+
+ if (defined $exp) {
+ seek $base, 0, 0 or croak $!;
+ my $got = ::md5sum($base);
+ if ($got ne $exp) {
+ my $err = "Checksum mismatch: ".
+ "$fb->{path} $fb->{blob}\n" .
+ "expected: $exp\n" .
+ " got: $got\n";
+ if ($base_is_link) {
+ warn $err,
+ "Retrying... (possibly ",
+ "a bad symlink from SVN)\n";
+ $::_repository->temp_reset($base);
+ $base_is_link = 0;
+ goto retry;
+ }
+ die $err;
+ }
+ }
+ }
+ seek $base, 0, 0 or croak $!;
+ $fb->{fh} = $fh;
+ $fb->{base} = $base;
+ [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
+}
+
+sub close_file {
+ my ($self, $fb, $exp) = @_;
+ return undef if $self->is_path_ignored($fb->{path});
+
+ my $hash;
+ my $path = $self->git_path($fb->{path});
+ if (my $fh = $fb->{fh}) {
+ if (defined $exp) {
+ seek($fh, 0, 0) or croak $!;
+ my $got = ::md5sum($fh);
+ if ($got ne $exp) {
+ die "Checksum mismatch: $path\n",
+ "expected: $exp\n got: $got\n";
+ }
+ }
+ if ($fb->{mode_b} == 120000) {
+ sysseek($fh, 0, 0) or croak $!;
+ my $rd = sysread($fh, my $buf, 5);
+
+ if (!defined $rd) {
+ croak "sysread: $!\n";
+ } elsif ($rd == 0) {
+ warn "$path has mode 120000",
+ " but it points to nothing\n",
+ "converting to an empty file with mode",
+ " 100644\n";
+ $fb->{mode_b} = '100644';
+ } elsif ($buf ne 'link ') {
+ warn "$path has mode 120000",
+ " but is not a link\n";
+ } else {
+ my $tmp_fh = $::_repository->temp_acquire(
+ 'svn_hash');
+ my $res;
+ while ($res = sysread($fh, my $str, 1024)) {
+ my $out = syswrite($tmp_fh, $str, $res);
+ defined($out) && $out == $res
+ or croak("write ",
+ Git::temp_path($tmp_fh),
+ ": $!\n");
+ }
+ defined $res or croak $!;
+
+ ($fh, $tmp_fh) = ($tmp_fh, $fh);
+ Git::temp_release($tmp_fh, 1);
+ }
+ }
+
+ $hash = $::_repository->hash_and_insert_object(
+ Git::temp_path($fh));
+ $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
+
+ Git::temp_release($fb->{base}, 1);
+ Git::temp_release($fh, 1);
+ } else {
+ $hash = $fb->{blob} or die "no blob information\n";
+ }
+ $fb->{pool}->clear;
+ $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
+ print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
+ undef;
+}
+
+sub abort_edit {
+ my $self = shift;
+ $self->{nr} = $self->{gii}->{nr};
+ delete $self->{gii};
+ $self->SUPER::abort_edit(@_);
+}
+
+sub close_edit {
+ my $self = shift;
+
+ if ($_preserve_empty_dirs) {
+ my @empty_dirs;
+
+ # Any entry flagged as empty that also has an associated
+ # dir_prop represents a newly created empty directory.
+ foreach my $i (keys %{$self->{empty}}) {
+ push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
+ }
+
+ # Search for directories that have become empty due subsequent
+ # file deletes.
+ push @empty_dirs, $self->find_empty_directories();
+
+ # Finally, add a placeholder file to each empty directory.
+ $self->add_placeholder_file($_) foreach (@empty_dirs);
+
+ $self->stash_placeholder_list();
+ }
+
+ $self->{git_commit_ok} = 1;
+ $self->{nr} = $self->{gii}->{nr};
+ delete $self->{gii};
+ $self->SUPER::close_edit(@_);
+}
+
+sub find_empty_directories {
+ my ($self) = @_;
+ my @empty_dirs;
+ my %dirs = map { dirname($_) => 1 } @deleted_gpath;
+
+ foreach my $dir (sort keys %dirs) {
+ next if $dir eq ".";
+
+ # If there have been any additions to this directory, there is
+ # no reason to check if it is empty.
+ my $skip_added = 0;
+ foreach my $t (qw/dir_prop file_prop/) {
+ foreach my $path (keys %{ $self->{$t} }) {
+ if (exists $self->{$t}->{dirname($path)}) {
+ $skip_added = 1;
+ last;
+ }
+ }
+ last if $skip_added;
+ }
+ next if $skip_added;
+
+ # Use `git ls-tree` to get the filenames of this directory
+ # that existed prior to this particular commit.
+ my $ls = command('ls-tree', '-z', '--name-only',
+ $self->{c}, "$dir/");
+ my %files = map { $_ => 1 } split(/\0/, $ls);
+
+ # Remove the filenames that were deleted during this commit.
+ delete $files{$_} foreach (@deleted_gpath);
+
+ # Report the directory if there are no filenames left.
+ push @empty_dirs, $dir unless (scalar %files);
+ }
+ @empty_dirs;
+}
+
+sub add_placeholder_file {
+ my ($self, $dir) = @_;
+ my $path = "$dir/$_placeholder_filename";
+ my $gpath = $self->git_path($path);
+
+ my $fh = $::_repository->temp_acquire($gpath);
+ my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
+ Git::temp_release($fh, 1);
+ $self->{gii}->update('100644', $hash, $gpath) or croak $!;
+
+ # The directory should no longer be considered empty.
+ delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
+
+ # Keep track of any placeholder files we create.
+ $added_placeholder{$dir} = $path;
+}
+
+sub stash_placeholder_list {
+ my ($self) = @_;
+ my $k = "svn-remote.$repo_id.added-placeholder";
+ my $v = eval { command_oneline('config', '--get-all', $k) };
+ command_noisy('config', '--unset-all', $k) if $v;
+ foreach (values %added_placeholder) {
+ command_noisy('config', '--add', $k, $_);
+ }
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
+
+=head1 SYNOPSIS
+
+ use SVN::Core;
+ use SVN::Ra;
+ use Git::SVN;
+ use Git::SVN::Fetcher;
+ use Git;
+
+ my $gs = Git::SVN->find_by_url($url);
+ my $ra = SVN::Ra->new(url => $url);
+ my $editor = Git::SVN::Fetcher->new($gs);
+ my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
+ 1, $editor);
+ $reporter->set_path('', $old_rev, 0);
+ $reporter->finish_report;
+ my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
+
+ foreach my $path (keys %{$editor->{dir_prop}) {
+ my $props = $editor->{dir_prop}{$path};
+ foreach my $prop (keys %$props) {
+ print "property $prop at $path changed to $props->{$prop}\n";
+ }
+ }
+ foreach my $path (keys %{$editor->{empty}) {
+ my $action = $editor->{empty}{$path} ? 'added' : 'removed';
+ print "empty directory $path $action\n";
+ }
+ foreach my $path (keys %{$editor->{file_prop}) { ... }
+ foreach my $parent (keys %{$editor->{absent_dir}}) {
+ my @children = @{$editor->{abstent_dir}{$parent}};
+ print "cannot fetch directory $parent/$_: not authorized?\n"
+ foreach @children;
+ }
+ foreach my $parent (keys %{$editor->{absent_file}) { ... }
+
+=head1 DESCRIPTION
+
+This is a subclass of C<SVN::Delta::Editor>, which means it implements
+callbacks to act as a consumer of Subversion tree deltas. This
+particular implementation of those callbacks is meant to store
+information about the resulting content which B<git svn fetch> could
+use to populate new commits and new entries for F<unhandled.log>.
+More specifically:
+
+=over
+
+=item * Additions, removals, and modifications of files are propagated
+to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
+B<git update-index>.
+
+=item * Changes in Subversion path properties are recorded in the
+C<dir_prop> and C<file_prop> fields (which are hashes).
+
+=item * Addition and removal of empty directories are indicated by
+entries with value 1 and 0 respectively in the C<empty> hash.
+
+=item * Paths that are present but cannot be conveyed (presumably due
+to permissions) are recorded in the C<absent_file> and
+C<absent_dirs> hashes. For each key, the corresponding value is
+a list of paths under that directory that were present but
+could not be conveyed.
+
+=back
+
+The interface is unstable. Do not use this module unless you are
+developing git-svn.
+
+=head1 DEPENDENCIES
+
+L<SVN::Delta> from the Subversion perl bindings,
+the core L<Carp> and L<File::Basename> modules,
+and git's L<Git> helper module.
+
+C<Git::SVN::Fetcher> has not been tested using callers other than
+B<git-svn> itself.
+
+=head1 SEE ALSO
+
+L<SVN::Delta>,
+L<Git::SVN::Editor>.
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+=head1 BUGS
+
+None.
diff --git a/perl/Git/SVN/GlobSpec.pm b/perl/Git/SVN/GlobSpec.pm
new file mode 100644
index 0000000000..a0a8d17621
--- /dev/null
+++ b/perl/Git/SVN/GlobSpec.pm
@@ -0,0 +1,65 @@
+package Git::SVN::GlobSpec;
+use strict;
+use warnings;
+
+sub new {
+ my ($class, $glob, $pattern_ok) = @_;
+ my $re = $glob;
+ $re =~ s!/+$!!g; # no need for trailing slashes
+ my (@left, @right, @patterns);
+ my $state = "left";
+ my $die_msg = "Only one set of wildcards " .
+ "(e.g. '*' or '*/*/*') is supported: $glob\n";
+ for my $part (split(m|/|, $glob)) {
+ if ($pattern_ok && $part =~ /[{}]/ &&
+ $part !~ /^\{[^{}]+\}/) {
+ die "Invalid pattern in '$glob': $part\n";
+ }
+ my $nstars = $part =~ tr/*//;
+ if ($nstars > 1) {
+ die "Only one '*' is allowed in a pattern: '$part'\n";
+ }
+ if ($part =~ /(.*)\*(.*)/) {
+ die $die_msg if $state eq "right";
+ my ($l, $r) = ($1, $2);
+ $state = "pattern";
+ my $pat = quotemeta($l) . '[^/]*' . quotemeta($r);
+ push(@patterns, $pat);
+ } elsif ($pattern_ok && $part =~ /^\{(.*)\}$/) {
+ die $die_msg if $state eq "right";
+ $state = "pattern";
+ my $p = quotemeta($1);
+ $p =~ s/\\,/|/g;
+ push(@patterns, "(?:$p)");
+ } else {
+ if ($state eq "left") {
+ push(@left, $part);
+ } else {
+ push(@right, $part);
+ $state = "right";
+ }
+ }
+ }
+ my $depth = @patterns;
+ if ($depth == 0) {
+ die "One '*' is needed in glob: '$glob'\n";
+ }
+ my $left = join('/', @left);
+ my $right = join('/', @right);
+ $re = join('/', @patterns);
+ $re = join('\/',
+ grep(length, quotemeta($left),
+ "($re)(?=/|\$)",
+ quotemeta($right)));
+ my $left_re = qr/^\/\Q$left\E(\/|$)/;
+ bless { left => $left, right => $right, left_regex => $left_re,
+ regex => qr/$re/, glob => $glob, depth => $depth }, $class;
+}
+
+sub full_path {
+ my ($self, $path) = @_;
+ return (length $self->{left} ? "$self->{left}/" : '') .
+ $path . (length $self->{right} ? "/$self->{right}" : '');
+}
+
+1;
diff --git a/perl/Git/SVN/Log.pm b/perl/Git/SVN/Log.pm
new file mode 100644
index 0000000000..664105357c
--- /dev/null
+++ b/perl/Git/SVN/Log.pm
@@ -0,0 +1,400 @@
+package Git::SVN::Log;
+use strict;
+use warnings;
+use Git::SVN::Utils qw(fatal);
+use Git qw(command
+ command_oneline
+ command_output_pipe
+ command_close_pipe
+ get_tz_offset);
+use POSIX qw/strftime/;
+use constant commit_log_separator => ('-' x 72) . "\n";
+use vars qw/$TZ $limit $color $pager $non_recursive $verbose $oneline
+ %rusers $show_commit $incremental/;
+
+# Option set in git-svn
+our $_git_format;
+
+sub cmt_showable {
+ my ($c) = @_;
+ return 1 if defined $c->{r};
+
+ # big commit message got truncated by the 16k pretty buffer in rev-list
+ if ($c->{l} && $c->{l}->[-1] eq "...\n" &&
+ $c->{a_raw} =~ /\@([a-f\d\-]+)>$/) {
+ @{$c->{l}} = ();
+ my @log = command(qw/cat-file commit/, $c->{c});
+
+ # shift off the headers
+ shift @log while ($log[0] ne '');
+ shift @log;
+
+ # TODO: make $c->{l} not have a trailing newline in the future
+ @{$c->{l}} = map { "$_\n" } grep !/^git-svn-id: /, @log;
+
+ (undef, $c->{r}, undef) = ::extract_metadata(
+ (grep(/^git-svn-id: /, @log))[-1]);
+ }
+ return defined $c->{r};
+}
+
+sub log_use_color {
+ return $color || Git->repository->get_colorbool('color.diff');
+}
+
+sub git_svn_log_cmd {
+ my ($r_min, $r_max, @args) = @_;
+ my $head = 'HEAD';
+ my (@files, @log_opts);
+ foreach my $x (@args) {
+ if ($x eq '--' || @files) {
+ push @files, $x;
+ } else {
+ if (::verify_ref("$x^0")) {
+ $head = $x;
+ } else {
+ push @log_opts, $x;
+ }
+ }
+ }
+
+ my ($url, $rev, $uuid, $gs) = ::working_head_info($head);
+
+ require Git::SVN;
+ $gs ||= Git::SVN->_new;
+ my @cmd = (qw/log --abbrev-commit --pretty=raw --default/,
+ $gs->refname);
+ push @cmd, '-r' unless $non_recursive;
+ push @cmd, qw/--raw --name-status/ if $verbose;
+ push @cmd, '--color' if log_use_color();
+ push @cmd, @log_opts;
+ if (defined $r_max && $r_max == $r_min) {
+ push @cmd, '--max-count=1';
+ if (my $c = $gs->rev_map_get($r_max)) {
+ push @cmd, $c;
+ }
+ } elsif (defined $r_max) {
+ if ($r_max < $r_min) {
+ ($r_min, $r_max) = ($r_max, $r_min);
+ }
+ my (undef, $c_max) = $gs->find_rev_before($r_max, 1, $r_min);
+ my (undef, $c_min) = $gs->find_rev_after($r_min, 1, $r_max);
+ # If there are no commits in the range, both $c_max and $c_min
+ # will be undefined. If there is at least 1 commit in the
+ # range, both will be defined.
+ return () if !defined $c_min || !defined $c_max;
+ if ($c_min eq $c_max) {
+ push @cmd, '--max-count=1', $c_min;
+ } else {
+ push @cmd, '--boundary', "$c_min..$c_max";
+ }
+ }
+ return (@cmd, @files);
+}
+
+# adapted from pager.c
+sub config_pager {
+ if (! -t *STDOUT) {
+ $ENV{GIT_PAGER_IN_USE} = 'false';
+ $pager = undef;
+ return;
+ }
+ chomp($pager = command_oneline(qw(var GIT_PAGER)));
+ if ($pager eq 'cat') {
+ $pager = undef;
+ }
+ $ENV{GIT_PAGER_IN_USE} = defined($pager);
+}
+
+sub run_pager {
+ return unless defined $pager;
+ pipe my ($rfd, $wfd) or return;
+ defined(my $pid = fork) or fatal "Can't fork: $!";
+ if (!$pid) {
+ open STDOUT, '>&', $wfd or
+ fatal "Can't redirect to stdout: $!";
+ return;
+ }
+ open STDIN, '<&', $rfd or fatal "Can't redirect stdin: $!";
+ $ENV{LESS} ||= 'FRX';
+ $ENV{LV} ||= '-c';
+ exec $pager or fatal "Can't run pager: $! ($pager)";
+}
+
+sub format_svn_date {
+ my $t = shift || time;
+ require Git::SVN;
+ my $gmoff = get_tz_offset($t);
+ return strftime("%Y-%m-%d %H:%M:%S $gmoff (%a, %d %b %Y)", localtime($t));
+}
+
+sub parse_git_date {
+ my ($t, $tz) = @_;
+ # Date::Parse isn't in the standard Perl distro :(
+ if ($tz =~ s/^\+//) {
+ $t += tz_to_s_offset($tz);
+ } elsif ($tz =~ s/^\-//) {
+ $t -= tz_to_s_offset($tz);
+ }
+ return $t;
+}
+
+sub set_local_timezone {
+ if (defined $TZ) {
+ $ENV{TZ} = $TZ;
+ } else {
+ delete $ENV{TZ};
+ }
+}
+
+sub tz_to_s_offset {
+ my ($tz) = @_;
+ $tz =~ s/(\d\d)$//;
+ return ($1 * 60) + ($tz * 3600);
+}
+
+sub get_author_info {
+ my ($dest, $author, $t, $tz) = @_;
+ $author =~ s/(?:^\s*|\s*$)//g;
+ $dest->{a_raw} = $author;
+ my $au;
+ if ($::_authors) {
+ $au = $rusers{$author} || undef;
+ }
+ if (!$au) {
+ ($au) = ($author =~ /<([^>]+)\@[^>]+>$/);
+ }
+ $dest->{t} = $t;
+ $dest->{tz} = $tz;
+ $dest->{a} = $au;
+ $dest->{t_utc} = parse_git_date($t, $tz);
+}
+
+sub process_commit {
+ my ($c, $r_min, $r_max, $defer) = @_;
+ if (defined $r_min && defined $r_max) {
+ if ($r_min == $c->{r} && $r_min == $r_max) {
+ show_commit($c);
+ return 0;
+ }
+ return 1 if $r_min == $r_max;
+ if ($r_min < $r_max) {
+ # we need to reverse the print order
+ return 0 if (defined $limit && --$limit < 0);
+ push @$defer, $c;
+ return 1;
+ }
+ if ($r_min != $r_max) {
+ return 1 if ($r_min < $c->{r});
+ return 1 if ($r_max > $c->{r});
+ }
+ }
+ return 0 if (defined $limit && --$limit < 0);
+ show_commit($c);
+ return 1;
+}
+
+my $l_fmt;
+sub show_commit {
+ my $c = shift;
+ if ($oneline) {
+ my $x = "\n";
+ if (my $l = $c->{l}) {
+ while ($l->[0] =~ /^\s*$/) { shift @$l }
+ $x = $l->[0];
+ }
+ $l_fmt ||= 'A' . length($c->{r});
+ print 'r',pack($l_fmt, $c->{r}),' | ';
+ print "$c->{c} | " if $show_commit;
+ print $x;
+ } else {
+ show_commit_normal($c);
+ }
+}
+
+sub show_commit_changed_paths {
+ my ($c) = @_;
+ return unless $c->{changed};
+ print "Changed paths:\n", @{$c->{changed}};
+}
+
+sub show_commit_normal {
+ my ($c) = @_;
+ print commit_log_separator, "r$c->{r} | ";
+ print "$c->{c} | " if $show_commit;
+ print "$c->{a} | ", format_svn_date($c->{t_utc}), ' | ';
+ my $nr_line = 0;
+
+ if (my $l = $c->{l}) {
+ while ($l->[$#$l] eq "\n" && $#$l > 0
+ && $l->[($#$l - 1)] eq "\n") {
+ pop @$l;
+ }
+ $nr_line = scalar @$l;
+ if (!$nr_line) {
+ print "1 line\n\n\n";
+ } else {
+ if ($nr_line == 1) {
+ $nr_line = '1 line';
+ } else {
+ $nr_line .= ' lines';
+ }
+ print $nr_line, "\n";
+ show_commit_changed_paths($c);
+ print "\n";
+ print $_ foreach @$l;
+ }
+ } else {
+ print "1 line\n";
+ show_commit_changed_paths($c);
+ print "\n";
+
+ }
+ foreach my $x (qw/raw stat diff/) {
+ if ($c->{$x}) {
+ print "\n";
+ print $_ foreach @{$c->{$x}}
+ }
+ }
+}
+
+sub cmd_show_log {
+ my (@args) = @_;
+ my ($r_min, $r_max);
+ my $r_last = -1; # prevent dupes
+ set_local_timezone();
+ if (defined $::_revision) {
+ if ($::_revision =~ /^(\d+):(\d+)$/) {
+ ($r_min, $r_max) = ($1, $2);
+ } elsif ($::_revision =~ /^\d+$/) {
+ $r_min = $r_max = $::_revision;
+ } else {
+ fatal "-r$::_revision is not supported, use ",
+ "standard 'git log' arguments instead";
+ }
+ }
+
+ config_pager();
+ @args = git_svn_log_cmd($r_min, $r_max, @args);
+ if (!@args) {
+ print commit_log_separator unless $incremental || $oneline;
+ return;
+ }
+ my $log = command_output_pipe(@args);
+ run_pager();
+ my (@k, $c, $d, $stat);
+ my $esc_color = qr/(?:\033\[(?:(?:\d+;)*\d*)?m)*/;
+ while (<$log>) {
+ if (/^${esc_color}commit (?:- )?($::sha1_short)/o) {
+ my $cmt = $1;
+ if ($c && cmt_showable($c) && $c->{r} != $r_last) {
+ $r_last = $c->{r};
+ process_commit($c, $r_min, $r_max, \@k) or
+ goto out;
+ }
+ $d = undef;
+ $c = { c => $cmt };
+ } elsif (/^${esc_color}author (.+) (\d+) ([\-\+]?\d+)$/o) {
+ get_author_info($c, $1, $2, $3);
+ } elsif (/^${esc_color}(?:tree|parent|committer) /o) {
+ # ignore
+ } elsif (/^${esc_color}:\d{6} \d{6} $::sha1_short/o) {
+ push @{$c->{raw}}, $_;
+ } elsif (/^${esc_color}[ACRMDT]\t/) {
+ # we could add $SVN->{svn_path} here, but that requires
+ # remote access at the moment (repo_path_split)...
+ s#^(${esc_color})([ACRMDT])\t#$1 $2 #o;
+ push @{$c->{changed}}, $_;
+ } elsif (/^${esc_color}diff /o) {
+ $d = 1;
+ push @{$c->{diff}}, $_;
+ } elsif ($d) {
+ push @{$c->{diff}}, $_;
+ } elsif (/^\ .+\ \|\s*\d+\ $esc_color[\+\-]*
+ $esc_color*[\+\-]*$esc_color$/x) {
+ $stat = 1;
+ push @{$c->{stat}}, $_;
+ } elsif ($stat && /^ \d+ files changed, \d+ insertions/) {
+ push @{$c->{stat}}, $_;
+ $stat = undef;
+ } elsif (/^${esc_color} (git-svn-id:.+)$/o) {
+ ($c->{url}, $c->{r}, undef) = ::extract_metadata($1);
+ } elsif (s/^${esc_color} //o) {
+ push @{$c->{l}}, $_;
+ }
+ }
+ if ($c && defined $c->{r} && $c->{r} != $r_last) {
+ $r_last = $c->{r};
+ process_commit($c, $r_min, $r_max, \@k);
+ }
+ if (@k) {
+ ($r_min, $r_max) = ($r_max, $r_min);
+ process_commit($_, $r_min, $r_max) foreach reverse @k;
+ }
+out:
+ close $log;
+ print commit_log_separator unless $incremental || $oneline;
+}
+
+sub cmd_blame {
+ my $path = pop;
+
+ config_pager();
+ run_pager();
+
+ my ($fh, $ctx, $rev);
+
+ if ($_git_format) {
+ ($fh, $ctx) = command_output_pipe('blame', @_, $path);
+ while (my $line = <$fh>) {
+ if ($line =~ /^\^?([[:xdigit:]]+)\s/) {
+ # Uncommitted edits show up as a rev ID of
+ # all zeros, which we can't look up with
+ # cmt_metadata
+ if ($1 !~ /^0+$/) {
+ (undef, $rev, undef) =
+ ::cmt_metadata($1);
+ $rev = '0' if (!$rev);
+ } else {
+ $rev = '0';
+ }
+ $rev = sprintf('%-10s', $rev);
+ $line =~ s/^\^?[[:xdigit:]]+(\s)/$rev$1/;
+ }
+ print $line;
+ }
+ } else {
+ ($fh, $ctx) = command_output_pipe('blame', '-p', @_, 'HEAD',
+ '--', $path);
+ my ($sha1);
+ my %authors;
+ my @buffer;
+ my %dsha; #distinct sha keys
+
+ while (my $line = <$fh>) {
+ push @buffer, $line;
+ if ($line =~ /^([[:xdigit:]]{40})\s\d+\s\d+/) {
+ $dsha{$1} = 1;
+ }
+ }
+
+ my $s2r = ::cmt_sha2rev_batch([keys %dsha]);
+
+ foreach my $line (@buffer) {
+ if ($line =~ /^([[:xdigit:]]{40})\s\d+\s\d+/) {
+ $rev = $s2r->{$1};
+ $rev = '0' if (!$rev)
+ }
+ elsif ($line =~ /^author (.*)/) {
+ $authors{$rev} = $1;
+ $authors{$rev} =~ s/\s/_/g;
+ }
+ elsif ($line =~ /^\t(.*)$/) {
+ printf("%6s %10s %s\n", $rev, $authors{$rev}, $1);
+ }
+ }
+ }
+ command_close_pipe($fh, $ctx);
+}
+
+1;
diff --git a/perl/Git/SVN/Memoize/YAML.pm b/perl/Git/SVN/Memoize/YAML.pm
new file mode 100644
index 0000000000..9676b8f2f7
--- /dev/null
+++ b/perl/Git/SVN/Memoize/YAML.pm
@@ -0,0 +1,93 @@
+package Git::SVN::Memoize::YAML;
+use warnings;
+use strict;
+use YAML::Any ();
+
+# based on Memoize::Storable.
+
+sub TIEHASH {
+ my $package = shift;
+ my $filename = shift;
+ my $truehash = (-e $filename) ? YAML::Any::LoadFile($filename) : {};
+ my $self = {FILENAME => $filename, H => $truehash};
+ bless $self => $package;
+}
+
+sub STORE {
+ my $self = shift;
+ $self->{H}{$_[0]} = $_[1];
+}
+
+sub FETCH {
+ my $self = shift;
+ $self->{H}{$_[0]};
+}
+
+sub EXISTS {
+ my $self = shift;
+ exists $self->{H}{$_[0]};
+}
+
+sub DESTROY {
+ my $self = shift;
+ YAML::Any::DumpFile($self->{FILENAME}, $self->{H});
+}
+
+sub SCALAR {
+ my $self = shift;
+ scalar(%{$self->{H}});
+}
+
+sub FIRSTKEY {
+ 'Fake hash from Git::SVN::Memoize::YAML';
+}
+
+sub NEXTKEY {
+ undef;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Git::SVN::Memoize::YAML - store Memoized data in YAML format
+
+=head1 SYNOPSIS
+
+ use Memoize;
+ use Git::SVN::Memoize::YAML;
+
+ tie my %cache => 'Git::SVN::Memoize::YAML', $filename;
+ memoize('slow_function', SCALAR_CACHE => [HASH => \%cache]);
+ slow_function(arguments);
+
+=head1 DESCRIPTION
+
+This module provides a class that can be used to tie a hash to a
+YAML file. The file is read when the hash is initialized and
+rewritten when the hash is destroyed.
+
+The intent is to allow L<Memoize> to back its cache with a file in
+YAML format, just like L<Memoize::Storable> allows L<Memoize> to
+back its cache with a file in Storable format. Unlike the Storable
+format, the YAML format is platform-independent and fairly stable.
+
+Carps on error.
+
+=head1 DIAGNOSTICS
+
+See L<YAML::Any>.
+
+=head1 DEPENDENCIES
+
+L<YAML::Any> from CPAN.
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+=head1 BUGS
+
+The entire cache is read into a Perl hash when loading the file,
+so this is not very scalable.
diff --git a/perl/Git/SVN/Migration.pm b/perl/Git/SVN/Migration.pm
new file mode 100644
index 0000000000..cf6ffa7581
--- /dev/null
+++ b/perl/Git/SVN/Migration.pm
@@ -0,0 +1,258 @@
+package Git::SVN::Migration;
+# these version numbers do NOT correspond to actual version numbers
+# of git or git-svn. They are just relative.
+#
+# v0 layout: .git/$id/info/url, refs/heads/$id-HEAD
+#
+# v1 layout: .git/$id/info/url, refs/remotes/$id
+#
+# v2 layout: .git/svn/$id/info/url, refs/remotes/$id
+#
+# v3 layout: .git/svn/$id, refs/remotes/$id
+# - info/url may remain for backwards compatibility
+# - this is what we migrate up to this layout automatically,
+# - this will be used by git svn init on single branches
+# v3.1 layout (auto migrated):
+# - .rev_db => .rev_db.$UUID, .rev_db will remain as a symlink
+# for backwards compatibility
+#
+# v4 layout: .git/svn/$repo_id/$id, refs/remotes/$repo_id/$id
+# - this is only created for newly multi-init-ed
+# repositories. Similar in spirit to the
+# --use-separate-remotes option in git-clone (now default)
+# - we do not automatically migrate to this (following
+# the example set by core git)
+#
+# v5 layout: .rev_db.$UUID => .rev_map.$UUID
+# - newer, more-efficient format that uses 24-bytes per record
+# with no filler space.
+# - use xxd -c24 < .rev_map.$UUID to view and debug
+# - This is a one-way migration, repositories updated to the
+# new format will not be able to use old git-svn without
+# rebuilding the .rev_db. Rebuilding the rev_db is not
+# possible if noMetadata or useSvmProps are set; but should
+# be no problem for users that use the (sensible) defaults.
+use strict;
+use warnings;
+use Carp qw/croak/;
+use File::Path qw/mkpath/;
+use File::Basename qw/dirname basename/;
+
+our $_minimize;
+use Git qw(
+ command
+ command_noisy
+ command_output_pipe
+ command_close_pipe
+);
+
+sub migrate_from_v0 {
+ my $git_dir = $ENV{GIT_DIR};
+ return undef unless -d $git_dir;
+ my ($fh, $ctx) = command_output_pipe(qw/rev-parse --symbolic --all/);
+ my $migrated = 0;
+ while (<$fh>) {
+ chomp;
+ my ($id, $orig_ref) = ($_, $_);
+ next unless $id =~ s#^refs/heads/(.+)-HEAD$#$1#;
+ next unless -f "$git_dir/$id/info/url";
+ my $new_ref = "refs/remotes/$id";
+ if (::verify_ref("$new_ref^0")) {
+ print STDERR "W: $orig_ref is probably an old ",
+ "branch used by an ancient version of ",
+ "git-svn.\n",
+ "However, $new_ref also exists.\n",
+ "We will not be able ",
+ "to use this branch until this ",
+ "ambiguity is resolved.\n";
+ next;
+ }
+ print STDERR "Migrating from v0 layout...\n" if !$migrated;
+ print STDERR "Renaming ref: $orig_ref => $new_ref\n";
+ command_noisy('update-ref', $new_ref, $orig_ref);
+ command_noisy('update-ref', '-d', $orig_ref, $orig_ref);
+ $migrated++;
+ }
+ command_close_pipe($fh, $ctx);
+ print STDERR "Done migrating from v0 layout...\n" if $migrated;
+ $migrated;
+}
+
+sub migrate_from_v1 {
+ my $git_dir = $ENV{GIT_DIR};
+ my $migrated = 0;
+ return $migrated unless -d $git_dir;
+ my $svn_dir = "$git_dir/svn";
+
+ # just in case somebody used 'svn' as their $id at some point...
+ return $migrated if -d $svn_dir && ! -f "$svn_dir/info/url";
+
+ print STDERR "Migrating from a git-svn v1 layout...\n";
+ mkpath([$svn_dir]);
+ print STDERR "Data from a previous version of git-svn exists, but\n\t",
+ "$svn_dir\n\t(required for this version ",
+ "($::VERSION) of git-svn) does not exist.\n";
+ my ($fh, $ctx) = command_output_pipe(qw/rev-parse --symbolic --all/);
+ while (<$fh>) {
+ my $x = $_;
+ next unless $x =~ s#^refs/remotes/##;
+ chomp $x;
+ next unless -f "$git_dir/$x/info/url";
+ my $u = eval { ::file_to_s("$git_dir/$x/info/url") };
+ next unless $u;
+ my $dn = dirname("$git_dir/svn/$x");
+ mkpath([$dn]) unless -d $dn;
+ if ($x eq 'svn') { # they used 'svn' as GIT_SVN_ID:
+ mkpath(["$git_dir/svn/svn"]);
+ print STDERR " - $git_dir/$x/info => ",
+ "$git_dir/svn/$x/info\n";
+ rename "$git_dir/$x/info", "$git_dir/svn/$x/info" or
+ croak "$!: $x";
+ # don't worry too much about these, they probably
+ # don't exist with repos this old (save for index,
+ # and we can easily regenerate that)
+ foreach my $f (qw/unhandled.log index .rev_db/) {
+ rename "$git_dir/$x/$f", "$git_dir/svn/$x/$f";
+ }
+ } else {
+ print STDERR " - $git_dir/$x => $git_dir/svn/$x\n";
+ rename "$git_dir/$x", "$git_dir/svn/$x" or
+ croak "$!: $x";
+ }
+ $migrated++;
+ }
+ command_close_pipe($fh, $ctx);
+ print STDERR "Done migrating from a git-svn v1 layout\n";
+ $migrated;
+}
+
+sub read_old_urls {
+ my ($l_map, $pfx, $path) = @_;
+ my @dir;
+ foreach (<$path/*>) {
+ if (-r "$_/info/url") {
+ $pfx .= '/' if $pfx && $pfx !~ m!/$!;
+ my $ref_id = $pfx . basename $_;
+ my $url = ::file_to_s("$_/info/url");
+ $l_map->{$ref_id} = $url;
+ } elsif (-d $_) {
+ push @dir, $_;
+ }
+ }
+ foreach (@dir) {
+ my $x = $_;
+ $x =~ s!^\Q$ENV{GIT_DIR}\E/svn/!!o;
+ read_old_urls($l_map, $x, $_);
+ }
+}
+
+sub migrate_from_v2 {
+ my @cfg = command(qw/config -l/);
+ return if grep /^svn-remote\..+\.url=/, @cfg;
+ my %l_map;
+ read_old_urls(\%l_map, '', "$ENV{GIT_DIR}/svn");
+ my $migrated = 0;
+
+ require Git::SVN;
+ foreach my $ref_id (sort keys %l_map) {
+ eval { Git::SVN->init($l_map{$ref_id}, '', undef, $ref_id) };
+ if ($@) {
+ Git::SVN->init($l_map{$ref_id}, '', $ref_id, $ref_id);
+ }
+ $migrated++;
+ }
+ $migrated;
+}
+
+sub minimize_connections {
+ require Git::SVN;
+ require Git::SVN::Ra;
+
+ my $r = Git::SVN::read_all_remotes();
+ my $new_urls = {};
+ my $root_repos = {};
+ foreach my $repo_id (keys %$r) {
+ my $url = $r->{$repo_id}->{url} or next;
+ my $fetch = $r->{$repo_id}->{fetch} or next;
+ my $ra = Git::SVN::Ra->new($url);
+
+ # skip existing cases where we already connect to the root
+ if (($ra->url eq $ra->{repos_root}) ||
+ ($ra->{repos_root} eq $repo_id)) {
+ $root_repos->{$ra->url} = $repo_id;
+ next;
+ }
+
+ my $root_ra = Git::SVN::Ra->new($ra->{repos_root});
+ my $root_path = $ra->url;
+ $root_path =~ s#^\Q$ra->{repos_root}\E(/|$)##;
+ foreach my $path (keys %$fetch) {
+ my $ref_id = $fetch->{$path};
+ my $gs = Git::SVN->new($ref_id, $repo_id, $path);
+
+ # make sure we can read when connecting to
+ # a higher level of a repository
+ my ($last_rev, undef) = $gs->last_rev_commit;
+ if (!defined $last_rev) {
+ $last_rev = eval {
+ $root_ra->get_latest_revnum;
+ };
+ next if $@;
+ }
+ my $new = $root_path;
+ $new .= length $path ? "/$path" : '';
+ eval {
+ $root_ra->get_log([$new], $last_rev, $last_rev,
+ 0, 0, 1, sub { });
+ };
+ next if $@;
+ $new_urls->{$ra->{repos_root}}->{$new} =
+ { ref_id => $ref_id,
+ old_repo_id => $repo_id,
+ old_path => $path };
+ }
+ }
+
+ my @emptied;
+ foreach my $url (keys %$new_urls) {
+ # see if we can re-use an existing [svn-remote "repo_id"]
+ # instead of creating a(n ugly) new section:
+ my $repo_id = $root_repos->{$url} || $url;
+
+ my $fetch = $new_urls->{$url};
+ foreach my $path (keys %$fetch) {
+ my $x = $fetch->{$path};
+ Git::SVN->init($url, $path, $repo_id, $x->{ref_id});
+ my $pfx = "svn-remote.$x->{old_repo_id}";
+
+ my $old_fetch = quotemeta("$x->{old_path}:".
+ "$x->{ref_id}");
+ command_noisy(qw/config --unset/,
+ "$pfx.fetch", '^'. $old_fetch . '$');
+ delete $r->{$x->{old_repo_id}}->
+ {fetch}->{$x->{old_path}};
+ if (!keys %{$r->{$x->{old_repo_id}}->{fetch}}) {
+ command_noisy(qw/config --unset/,
+ "$pfx.url");
+ push @emptied, $x->{old_repo_id}
+ }
+ }
+ }
+ if (@emptied) {
+ my $file = $ENV{GIT_CONFIG} || "$ENV{GIT_DIR}/config";
+ print STDERR <<EOF;
+The following [svn-remote] sections in your config file ($file) are empty
+and can be safely removed:
+EOF
+ print STDERR "[svn-remote \"$_\"]\n" foreach @emptied;
+ }
+}
+
+sub migration_check {
+ migrate_from_v0();
+ migrate_from_v1();
+ migrate_from_v2();
+ minimize_connections() if $_minimize;
+}
+
+1;
diff --git a/perl/Git/SVN/Prompt.pm b/perl/Git/SVN/Prompt.pm
new file mode 100644
index 0000000000..e940b08505
--- /dev/null
+++ b/perl/Git/SVN/Prompt.pm
@@ -0,0 +1,184 @@
+package Git::SVN::Prompt;
+use strict;
+use warnings;
+require SVN::Core;
+use vars qw/$_no_auth_cache $_username/;
+
+sub simple {
+ my ($cred, $realm, $default_username, $may_save, $pool) = @_;
+ $may_save = undef if $_no_auth_cache;
+ $default_username = $_username if defined $_username;
+ if (defined $default_username && length $default_username) {
+ if (defined $realm && length $realm) {
+ print STDERR "Authentication realm: $realm\n";
+ STDERR->flush;
+ }
+ $cred->username($default_username);
+ } else {
+ username($cred, $realm, $may_save, $pool);
+ }
+ $cred->password(_read_password("Password for '" .
+ $cred->username . "': ", $realm));
+ $cred->may_save($may_save);
+ $SVN::_Core::SVN_NO_ERROR;
+}
+
+sub ssl_server_trust {
+ my ($cred, $realm, $failures, $cert_info, $may_save, $pool) = @_;
+ $may_save = undef if $_no_auth_cache;
+ print STDERR "Error validating server certificate for '$realm':\n";
+ {
+ no warnings 'once';
+ # All variables SVN::Auth::SSL::* are used only once,
+ # so we're shutting up Perl warnings about this.
+ if ($failures & $SVN::Auth::SSL::UNKNOWNCA) {
+ print STDERR " - The certificate is not issued ",
+ "by a trusted authority. Use the\n",
+ " fingerprint to validate ",
+ "the certificate manually!\n";
+ }
+ if ($failures & $SVN::Auth::SSL::CNMISMATCH) {
+ print STDERR " - The certificate hostname ",
+ "does not match.\n";
+ }
+ if ($failures & $SVN::Auth::SSL::NOTYETVALID) {
+ print STDERR " - The certificate is not yet valid.\n";
+ }
+ if ($failures & $SVN::Auth::SSL::EXPIRED) {
+ print STDERR " - The certificate has expired.\n";
+ }
+ if ($failures & $SVN::Auth::SSL::OTHER) {
+ print STDERR " - The certificate has ",
+ "an unknown error.\n";
+ }
+ } # no warnings 'once'
+ printf STDERR
+ "Certificate information:\n".
+ " - Hostname: %s\n".
+ " - Valid: from %s until %s\n".
+ " - Issuer: %s\n".
+ " - Fingerprint: %s\n",
+ map $cert_info->$_, qw(hostname valid_from valid_until
+ issuer_dname fingerprint);
+ my $choice;
+prompt:
+ my $options = $may_save ?
+ "(R)eject, accept (t)emporarily or accept (p)ermanently? " :
+ "(R)eject or accept (t)emporarily? ";
+ STDERR->flush;
+ $choice = lc(substr(Git::prompt("Certificate problem.\n" . $options) || 'R', 0, 1));
+ if ($choice eq 't') {
+ $cred->may_save(undef);
+ } elsif ($choice eq 'r') {
+ return -1;
+ } elsif ($may_save && $choice eq 'p') {
+ $cred->may_save($may_save);
+ } else {
+ goto prompt;
+ }
+ $cred->accepted_failures($failures);
+ $SVN::_Core::SVN_NO_ERROR;
+}
+
+sub ssl_client_cert {
+ my ($cred, $realm, $may_save, $pool) = @_;
+ $may_save = undef if $_no_auth_cache;
+ print STDERR "Client certificate filename: ";
+ STDERR->flush;
+ chomp(my $filename = <STDIN>);
+ $cred->cert_file($filename);
+ $cred->may_save($may_save);
+ $SVN::_Core::SVN_NO_ERROR;
+}
+
+sub ssl_client_cert_pw {
+ my ($cred, $realm, $may_save, $pool) = @_;
+ $may_save = undef if $_no_auth_cache;
+ $cred->password(_read_password("Password: ", $realm));
+ $cred->may_save($may_save);
+ $SVN::_Core::SVN_NO_ERROR;
+}
+
+sub username {
+ my ($cred, $realm, $may_save, $pool) = @_;
+ $may_save = undef if $_no_auth_cache;
+ if (defined $realm && length $realm) {
+ print STDERR "Authentication realm: $realm\n";
+ }
+ my $username;
+ if (defined $_username) {
+ $username = $_username;
+ } else {
+ $username = Git::prompt("Username: ");
+ }
+ $cred->username($username);
+ $cred->may_save($may_save);
+ $SVN::_Core::SVN_NO_ERROR;
+}
+
+sub _read_password {
+ my ($prompt, $realm) = @_;
+ my $password = Git::prompt($prompt, 1);
+ $password;
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Git::SVN::Prompt - authentication callbacks for git-svn
+
+=head1 SYNOPSIS
+
+ use Git::SVN::Prompt qw(simple ssl_client_cert ssl_client_cert_pw
+ ssl_server_trust username);
+ use SVN::Client ();
+
+ my $cached_simple = SVN::Client::get_simple_provider();
+ my $git_simple = SVN::Client::get_simple_prompt_provider(\&simple, 2);
+ my $cached_ssl = SVN::Client::get_ssl_server_trust_file_provider();
+ my $git_ssl = SVN::Client::get_ssl_server_trust_prompt_provider(
+ \&ssl_server_trust);
+ my $cached_cert = SVN::Client::get_ssl_client_cert_file_provider();
+ my $git_cert = SVN::Client::get_ssl_client_cert_prompt_provider(
+ \&ssl_client_cert, 2);
+ my $cached_cert_pw = SVN::Client::get_ssl_client_cert_pw_file_provider();
+ my $git_cert_pw = SVN::Client::get_ssl_client_cert_pw_prompt_provider(
+ \&ssl_client_cert_pw, 2);
+ my $cached_username = SVN::Client::get_username_provider();
+ my $git_username = SVN::Client::get_username_prompt_provider(
+ \&username, 2);
+
+ my $ctx = new SVN::Client(
+ auth => [
+ $cached_simple, $git_simple,
+ $cached_ssl, $git_ssl,
+ $cached_cert, $git_cert,
+ $cached_cert_pw, $git_cert_pw,
+ $cached_username, $git_username
+ ]);
+
+=head1 DESCRIPTION
+
+This module is an implementation detail of the "git svn" command.
+It implements git-svn's authentication policy. Do not use it unless
+you are developing git-svn.
+
+The interface will change as git-svn evolves.
+
+=head1 DEPENDENCIES
+
+L<SVN::Core>.
+
+=head1 SEE ALSO
+
+L<SVN::Client>.
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+=head1 BUGS
+
+None.
diff --git a/perl/Git/SVN/Ra.pm b/perl/Git/SVN/Ra.pm
new file mode 100644
index 0000000000..e764696801
--- /dev/null
+++ b/perl/Git/SVN/Ra.pm
@@ -0,0 +1,708 @@
+package Git::SVN::Ra;
+use vars qw/@ISA $config_dir $_ignore_refs_regex $_log_window_size/;
+use strict;
+use warnings;
+use Memoize;
+use Git::SVN::Utils qw(
+ canonicalize_url
+ canonicalize_path
+ add_path_to_url
+);
+
+use SVN::Ra;
+BEGIN {
+ @ISA = qw(SVN::Ra);
+}
+
+my ($ra_invalid, $can_do_switch, %ignored_err, $RA);
+
+BEGIN {
+ # enforce temporary pool usage for some simple functions
+ no strict 'refs';
+ for my $f (qw/rev_proplist get_latest_revnum get_uuid get_repos_root
+ get_file/) {
+ my $SUPER = "SUPER::$f";
+ *$f = sub {
+ my $self = shift;
+ my $pool = SVN::Pool->new;
+ my @ret = $self->$SUPER(@_,$pool);
+ $pool->clear;
+ wantarray ? @ret : $ret[0];
+ };
+ }
+}
+
+# serf has a bug that leads to a coredump upon termination if the
+# remote access object is left around (not fixed yet in serf 1.3.1).
+# Explicitly free it to work around the issue.
+END {
+ $RA = undef;
+ $ra_invalid = 1;
+}
+
+sub _auth_providers () {
+ require SVN::Client;
+ my @rv = (
+ SVN::Client::get_simple_provider(),
+ SVN::Client::get_ssl_server_trust_file_provider(),
+ SVN::Client::get_simple_prompt_provider(
+ \&Git::SVN::Prompt::simple, 2),
+ SVN::Client::get_ssl_client_cert_file_provider(),
+ SVN::Client::get_ssl_client_cert_prompt_provider(
+ \&Git::SVN::Prompt::ssl_client_cert, 2),
+ SVN::Client::get_ssl_client_cert_pw_file_provider(),
+ SVN::Client::get_ssl_client_cert_pw_prompt_provider(
+ \&Git::SVN::Prompt::ssl_client_cert_pw, 2),
+ SVN::Client::get_username_provider(),
+ SVN::Client::get_ssl_server_trust_prompt_provider(
+ \&Git::SVN::Prompt::ssl_server_trust),
+ SVN::Client::get_username_prompt_provider(
+ \&Git::SVN::Prompt::username, 2)
+ );
+
+ # earlier 1.6.x versions would segfault, and <= 1.5.x didn't have
+ # this function
+ if (::compare_svn_version('1.6.15') >= 0) {
+ my $config = SVN::Core::config_get_config($config_dir);
+ my ($p, @a);
+ # config_get_config returns all config files from
+ # ~/.subversion, auth_get_platform_specific_client_providers
+ # just wants the config "file".
+ @a = ($config->{'config'}, undef);
+ $p = SVN::Core::auth_get_platform_specific_client_providers(@a);
+ # Insert the return value from
+ # auth_get_platform_specific_providers
+ unshift @rv, @$p;
+ }
+ \@rv;
+}
+
+sub prepare_config_once {
+ SVN::_Core::svn_config_ensure($config_dir, undef);
+ my ($baton, $callbacks) = SVN::Core::auth_open_helper(_auth_providers);
+ my $config = SVN::Core::config_get_config($config_dir);
+ my $conf_t = $config->{'config'};
+
+ no warnings 'once';
+ # The usage of $SVN::_Core::SVN_CONFIG_* variables
+ # produces warnings that variables are used only once.
+ # I had not found the better way to shut them up, so
+ # the warnings of type 'once' are disabled in this block.
+ if (SVN::_Core::svn_config_get_bool($conf_t,
+ $SVN::_Core::SVN_CONFIG_SECTION_AUTH,
+ $SVN::_Core::SVN_CONFIG_OPTION_STORE_PASSWORDS,
+ 1) == 0) {
+ my $val = '1';
+ if (::compare_svn_version('1.9.0') < 0) { # pre-SVN r1553823
+ my $dont_store_passwords = 1;
+ $val = bless \$dont_store_passwords, "_p_void";
+ }
+ SVN::_Core::svn_auth_set_parameter($baton,
+ $SVN::_Core::SVN_AUTH_PARAM_DONT_STORE_PASSWORDS,
+ $val);
+ }
+ if (SVN::_Core::svn_config_get_bool($conf_t,
+ $SVN::_Core::SVN_CONFIG_SECTION_AUTH,
+ $SVN::_Core::SVN_CONFIG_OPTION_STORE_AUTH_CREDS,
+ 1) == 0) {
+ $Git::SVN::Prompt::_no_auth_cache = 1;
+ }
+
+ return ($config, $baton, $callbacks);
+} # no warnings 'once'
+
+INIT {
+ Memoize::memoize '_auth_providers';
+ Memoize::memoize 'prepare_config_once';
+}
+
+sub new {
+ my ($class, $url) = @_;
+ $url = canonicalize_url($url);
+ return $RA if ($RA && $RA->url eq $url);
+
+ ::_req_svn();
+
+ $RA = undef;
+ my ($config, $baton, $callbacks) = prepare_config_once();
+ my $self = SVN::Ra->new(url => $url, auth => $baton,
+ config => $config,
+ pool => SVN::Pool->new,
+ auth_provider_callbacks => $callbacks);
+ $RA = bless $self, $class;
+
+ # Make sure its canonicalized
+ $self->url($url);
+ $self->{svn_path} = $url;
+ $self->{repos_root} = $self->get_repos_root;
+ $self->{svn_path} =~ s#^\Q$self->{repos_root}\E(/|$)##;
+ $self->{cache} = { check_path => { r => 0, data => {} },
+ get_dir => { r => 0, data => {} } };
+
+ return $RA;
+}
+
+sub url {
+ my $self = shift;
+
+ if (@_) {
+ my $url = shift;
+ $self->{url} = canonicalize_url($url);
+ return;
+ }
+
+ return $self->{url};
+}
+
+sub check_path {
+ my ($self, $path, $r) = @_;
+ my $cache = $self->{cache}->{check_path};
+ if ($r == $cache->{r} && exists $cache->{data}->{$path}) {
+ return $cache->{data}->{$path};
+ }
+ my $pool = SVN::Pool->new;
+ my $t = $self->SUPER::check_path($path, $r, $pool);
+ $pool->clear;
+ if ($r != $cache->{r}) {
+ %{$cache->{data}} = ();
+ $cache->{r} = $r;
+ }
+ $cache->{data}->{$path} = $t;
+}
+
+sub get_dir {
+ my ($self, $dir, $r) = @_;
+ my $cache = $self->{cache}->{get_dir};
+ if ($r == $cache->{r}) {
+ if (my $x = $cache->{data}->{$dir}) {
+ return wantarray ? @$x : $x->[0];
+ }
+ }
+ my $pool = SVN::Pool->new;
+ my ($d, undef, $props);
+
+ if (::compare_svn_version('1.4.0') >= 0) {
+ # n.b. in addition to being potentially more efficient,
+ # this works around what appears to be a bug in some
+ # SVN 1.8 versions
+ my $kind = 1; # SVN_DIRENT_KIND
+ ($d, undef, $props) = $self->get_dir2($dir, $r, $kind, $pool);
+ } else {
+ ($d, undef, $props) = $self->SUPER::get_dir($dir, $r, $pool);
+ }
+ my %dirents = map { $_ => { kind => $d->{$_}->kind } } keys %$d;
+ $pool->clear;
+ if ($r != $cache->{r}) {
+ %{$cache->{data}} = ();
+ $cache->{r} = $r;
+ }
+ $cache->{data}->{$dir} = [ \%dirents, $r, $props ];
+ wantarray ? (\%dirents, $r, $props) : \%dirents;
+}
+
+# get_log(paths, start, end, limit,
+# discover_changed_paths, strict_node_history, receiver)
+sub get_log {
+ my ($self, @args) = @_;
+ my $pool = SVN::Pool->new;
+
+ # svn_log_changed_path_t objects passed to get_log are likely to be
+ # overwritten even if only the refs are copied to an external variable,
+ # so we should dup the structures in their entirety. Using an
+ # externally passed pool (instead of our temporary and quickly cleared
+ # pool in Git::SVN::Ra) does not help matters at all...
+ my $receiver = pop @args;
+ my $prefix = "/".$self->{svn_path};
+ $prefix =~ s#/+($)##;
+ my $prefix_regex = qr#^\Q$prefix\E#;
+ push(@args, sub {
+ my ($paths) = $_[0];
+ return &$receiver(@_) unless $paths;
+ $_[0] = ();
+ foreach my $p (keys %$paths) {
+ my $i = $paths->{$p};
+ # Make path relative to our url, not repos_root
+ $p =~ s/$prefix_regex//;
+ my %s = map { $_ => $i->$_; }
+ qw/copyfrom_path copyfrom_rev action/;
+ if ($s{'copyfrom_path'}) {
+ $s{'copyfrom_path'} =~ s/$prefix_regex//;
+ $s{'copyfrom_path'} = canonicalize_path($s{'copyfrom_path'});
+ }
+ $_[0]{$p} = \%s;
+ }
+ &$receiver(@_);
+ });
+
+
+ # the limit parameter was not supported in SVN 1.1.x, so we
+ # drop it. Therefore, the receiver callback passed to it
+ # is made aware of this limitation by being wrapped if
+ # the limit passed to is being wrapped.
+ if (::compare_svn_version('1.2.0') <= 0) {
+ my $limit = splice(@args, 3, 1);
+ if ($limit > 0) {
+ my $receiver = pop @args;
+ push(@args, sub { &$receiver(@_) if (--$limit >= 0) });
+ }
+ }
+ my $ret = $self->SUPER::get_log(@args, $pool);
+ $pool->clear;
+ $ret;
+}
+
+# uncommon, only for ancient SVN (<= 1.4.2)
+sub trees_match {
+ require IO::File;
+ require SVN::Client;
+ my ($self, $url1, $rev1, $url2, $rev2) = @_;
+ my $ctx = SVN::Client->new(auth => _auth_providers);
+ my $out = IO::File->new_tmpfile;
+
+ # older SVN (1.1.x) doesn't take $pool as the last parameter for
+ # $ctx->diff(), so we'll create a default one
+ my $pool = SVN::Pool->new_default_sub;
+
+ $ra_invalid = 1; # this will open a new SVN::Ra connection to $url1
+ $ctx->diff([], $url1, $rev1, $url2, $rev2, 1, 1, 0, $out, $out);
+ $out->flush;
+ my $ret = (($out->stat)[7] == 0);
+ close $out or croak $!;
+
+ $ret;
+}
+
+sub get_commit_editor {
+ my ($self, $log, $cb, $pool) = @_;
+
+ my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef, 0) : ();
+ $self->SUPER::get_commit_editor($log, $cb, @lock, $pool);
+}
+
+sub gs_do_update {
+ my ($self, $rev_a, $rev_b, $gs, $editor) = @_;
+ my $new = ($rev_a == $rev_b);
+ my $path = $gs->path;
+
+ if ($new && -e $gs->{index}) {
+ unlink $gs->{index} or die
+ "Couldn't unlink index: $gs->{index}: $!\n";
+ }
+ my $pool = SVN::Pool->new;
+ $editor->set_path_strip($path);
+ my (@pc) = split m#/#, $path;
+ my $reporter = $self->do_update($rev_b, (@pc ? shift @pc : ''),
+ 1, $editor, $pool);
+ my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : ();
+
+ # Since we can't rely on svn_ra_reparent being available, we'll
+ # just have to do some magic with set_path to make it so
+ # we only want a partial path.
+ my $sp = '';
+ my $final = join('/', @pc);
+ while (@pc) {
+ $reporter->set_path($sp, $rev_b, 0, @lock, $pool);
+ $sp .= '/' if length $sp;
+ $sp .= shift @pc;
+ }
+ die "BUG: '$sp' != '$final'\n" if ($sp ne $final);
+
+ $reporter->set_path($sp, $rev_a, $new, @lock, $pool);
+
+ $reporter->finish_report($pool);
+ $pool->clear;
+ $editor->{git_commit_ok};
+}
+
+# this requires SVN 1.4.3 or later (do_switch didn't work before 1.4.3, and
+# svn_ra_reparent didn't work before 1.4)
+sub gs_do_switch {
+ my ($self, $rev_a, $rev_b, $gs, $url_b, $editor) = @_;
+ my $path = $gs->path;
+ my $pool = SVN::Pool->new;
+
+ my $old_url = $self->url;
+ my $full_url = add_path_to_url( $self->url, $path );
+ my ($ra, $reparented);
+
+ if ($old_url =~ m#^svn(\+\w+)?://# ||
+ ($full_url =~ m#^https?://# &&
+ canonicalize_url($full_url) ne $full_url)) {
+ $_[0] = undef;
+ $self = undef;
+ $RA = undef;
+ $ra = Git::SVN::Ra->new($full_url);
+ $ra_invalid = 1;
+ } elsif ($old_url ne $full_url) {
+ SVN::_Ra::svn_ra_reparent(
+ $self->{session},
+ canonicalize_url($full_url),
+ $pool
+ );
+ $self->url($full_url);
+ $reparented = 1;
+ }
+
+ $ra ||= $self;
+ $url_b = canonicalize_url($url_b);
+ my $reporter = $ra->do_switch($rev_b, '', 1, $url_b, $editor, $pool);
+ my @lock = (::compare_svn_version('1.2.0') >= 0) ? (undef) : ();
+ $reporter->set_path('', $rev_a, 0, @lock, $pool);
+ $reporter->finish_report($pool);
+
+ if ($reparented) {
+ SVN::_Ra::svn_ra_reparent($self->{session}, $old_url, $pool);
+ $self->url($old_url);
+ }
+
+ $pool->clear;
+ $editor->{git_commit_ok};
+}
+
+sub longest_common_path {
+ my ($gsv, $globs) = @_;
+ my %common;
+ my $common_max = scalar @$gsv;
+
+ foreach my $gs (@$gsv) {
+ my @tmp = split m#/#, $gs->path;
+ my $p = '';
+ foreach (@tmp) {
+ $p .= length($p) ? "/$_" : $_;
+ $common{$p} ||= 0;
+ $common{$p}++;
+ }
+ }
+ $globs ||= [];
+ $common_max += scalar @$globs;
+ foreach my $glob (@$globs) {
+ my @tmp = split m#/#, $glob->{path}->{left};
+ my $p = '';
+ foreach (@tmp) {
+ $p .= length($p) ? "/$_" : $_;
+ $common{$p} ||= 0;
+ $common{$p}++;
+ }
+ }
+
+ my $longest_path = '';
+ foreach (sort {length $b <=> length $a} keys %common) {
+ if ($common{$_} == $common_max) {
+ $longest_path = $_;
+ last;
+ }
+ }
+ $longest_path;
+}
+
+sub gs_fetch_loop_common {
+ my ($self, $base, $head, $gsv, $globs) = @_;
+ return if ($base > $head);
+ # Make sure the cat_blob open2 FileHandle is created before calling
+ # SVN::Pool::new_default so that it does not incorrectly end up in the pool.
+ $::_repository->_open_cat_blob_if_needed;
+ my $gpool = SVN::Pool->new_default;
+ my $ra_url = $self->url;
+ my $reload_ra = sub {
+ $_[0] = undef;
+ $self = undef;
+ $RA = undef;
+ $gpool->clear;
+ $self = Git::SVN::Ra->new($ra_url);
+ $ra_invalid = undef;
+ };
+ my $inc = $_log_window_size;
+ my ($min, $max) = ($base, $head < $base + $inc ? $head : $base + $inc);
+ my $longest_path = longest_common_path($gsv, $globs);
+ my $find_trailing_edge;
+ while (1) {
+ my %revs;
+ my $err;
+ my $err_handler = $SVN::Error::handler;
+ $SVN::Error::handler = sub {
+ ($err) = @_;
+ skip_unknown_revs($err);
+ };
+ sub _cb {
+ my ($paths, $r, $author, $date, $log) = @_;
+ [ $paths,
+ { author => $author, date => $date, log => $log } ];
+ }
+ $self->get_log([$longest_path], $min, $max, 0, 1, 1,
+ sub { $revs{$_[1]} = _cb(@_) });
+ if ($err) {
+ print "Checked through r$max\r";
+ } else {
+ $find_trailing_edge = 1;
+ }
+ if ($err and $find_trailing_edge) {
+ print STDERR "Path '$longest_path' ",
+ "was probably deleted:\n",
+ $err->expanded_message,
+ "\nWill attempt to follow ",
+ "revisions r$min .. r$max ",
+ "committed before the deletion\n";
+ my $hi = $max;
+ while (--$hi >= $min) {
+ my $ok;
+ $self->get_log([$longest_path], $min, $hi,
+ 0, 1, 1, sub {
+ $ok = $_[1];
+ $revs{$_[1]} = _cb(@_) });
+ if ($ok) {
+ print STDERR "r$min .. r$ok OK\n";
+ last;
+ }
+ }
+ $find_trailing_edge = 0;
+ }
+ $SVN::Error::handler = $err_handler;
+
+ my %exists = map { $_->path => $_ } @$gsv;
+ foreach my $r (sort {$a <=> $b} keys %revs) {
+ my ($paths, $logged) = @{delete $revs{$r}};
+
+ foreach my $gs ($self->match_globs(\%exists, $paths,
+ $globs, $r)) {
+ if ($gs->rev_map_max >= $r) {
+ next;
+ }
+ next unless $gs->match_paths($paths, $r);
+ $gs->{logged_rev_props} = $logged;
+ if (my $last_commit = $gs->last_commit) {
+ $gs->assert_index_clean($last_commit);
+ }
+ my $log_entry = $gs->do_fetch($paths, $r);
+ if ($log_entry) {
+ $gs->do_git_commit($log_entry);
+ }
+ $Git::SVN::INDEX_FILES{$gs->{index}} = 1;
+ }
+ foreach my $g (@$globs) {
+ my $k = "svn-remote.$g->{remote}." .
+ "$g->{t}-maxRev";
+ Git::SVN::tmp_config($k, $r);
+ }
+ $reload_ra->() if $ra_invalid;
+ }
+ # pre-fill the .rev_db since it'll eventually get filled in
+ # with '0' x40 if something new gets committed
+ foreach my $gs (@$gsv) {
+ next if $gs->rev_map_max >= $max;
+ next if defined $gs->rev_map_get($max);
+ $gs->rev_map_set($max, 0 x40);
+ }
+ foreach my $g (@$globs) {
+ my $k = "svn-remote.$g->{remote}.$g->{t}-maxRev";
+ Git::SVN::tmp_config($k, $max);
+ }
+ last if $max >= $head;
+ $min = $max + 1;
+ $max += $inc;
+ $max = $head if ($max > $head);
+
+ $reload_ra->();
+ }
+ Git::SVN::gc();
+}
+
+sub get_dir_globbed {
+ my ($self, $left, $depth, $r) = @_;
+
+ my @x = eval { $self->get_dir($left, $r) };
+ return unless scalar @x == 3;
+ my $dirents = $x[0];
+ my @finalents;
+ foreach my $de (keys %$dirents) {
+ next if $dirents->{$de}->{kind} != $SVN::Node::dir;
+ if ($depth > 1) {
+ my @args = ("$left/$de", $depth - 1, $r);
+ foreach my $dir ($self->get_dir_globbed(@args)) {
+ push @finalents, "$de/$dir";
+ }
+ } else {
+ push @finalents, $de;
+ }
+ }
+ @finalents;
+}
+
+# return value: 0 -- don't ignore, 1 -- ignore
+sub is_ref_ignored {
+ my ($g, $p) = @_;
+ my $refname = $g->{ref}->full_path($p);
+ return 1 if defined($g->{ignore_refs_regex}) &&
+ $refname =~ m!$g->{ignore_refs_regex}!;
+ return 0 unless defined($_ignore_refs_regex);
+ return 1 if $refname =~ m!$_ignore_refs_regex!o;
+ return 0;
+}
+
+sub match_globs {
+ my ($self, $exists, $paths, $globs, $r) = @_;
+
+ sub get_dir_check {
+ my ($self, $exists, $g, $r) = @_;
+
+ my @dirs = $self->get_dir_globbed($g->{path}->{left},
+ $g->{path}->{depth},
+ $r);
+
+ foreach my $de (@dirs) {
+ my $p = $g->{path}->full_path($de);
+ next if $exists->{$p};
+ next if (length $g->{path}->{right} &&
+ ($self->check_path($p, $r) !=
+ $SVN::Node::dir));
+ next unless $p =~ /$g->{path}->{regex}/;
+ $exists->{$p} = Git::SVN->init($self->url, $p, undef,
+ $g->{ref}->full_path($de), 1);
+ }
+ }
+ foreach my $g (@$globs) {
+ if (my $path = $paths->{"/$g->{path}->{left}"}) {
+ if ($path->{action} =~ /^[AR]$/) {
+ get_dir_check($self, $exists, $g, $r);
+ }
+ }
+ foreach (keys %$paths) {
+ if (/$g->{path}->{left_regex}/ &&
+ !/$g->{path}->{regex}/) {
+ next if $paths->{$_}->{action} !~ /^[AR]$/;
+ get_dir_check($self, $exists, $g, $r);
+ }
+ next unless /$g->{path}->{regex}/;
+ my $p = $1;
+ my $pathname = $g->{path}->full_path($p);
+ next if is_ref_ignored($g, $p);
+ next if $exists->{$pathname};
+ next if ($self->check_path($pathname, $r) !=
+ $SVN::Node::dir);
+ $exists->{$pathname} = Git::SVN->init(
+ $self->url, $pathname, undef,
+ $g->{ref}->full_path($p), 1);
+ }
+ my $c = '';
+ foreach (split m#/#, $g->{path}->{left}) {
+ $c .= "/$_";
+ next unless ($paths->{$c} &&
+ ($paths->{$c}->{action} =~ /^[AR]$/));
+ get_dir_check($self, $exists, $g, $r);
+ }
+ }
+ values %$exists;
+}
+
+sub minimize_url {
+ my ($self) = @_;
+ return $self->url if ($self->url eq $self->{repos_root});
+ my $url = $self->{repos_root};
+ my @components = split(m!/!, $self->{svn_path});
+ my $c = '';
+ do {
+ $url = add_path_to_url($url, $c);
+ eval {
+ my $ra = (ref $self)->new($url);
+ my $latest = $ra->get_latest_revnum;
+ $ra->get_log("", $latest, 0, 1, 0, 1, sub {});
+ };
+ } while ($@ && ($c = shift @components));
+
+ return canonicalize_url($url);
+}
+
+sub can_do_switch {
+ my $self = shift;
+ unless (defined $can_do_switch) {
+ my $pool = SVN::Pool->new;
+ my $rep = eval {
+ $self->do_switch(1, '', 0, $self->url,
+ SVN::Delta::Editor->new, $pool);
+ };
+ if ($@) {
+ $can_do_switch = 0;
+ } else {
+ $rep->abort_report($pool);
+ $can_do_switch = 1;
+ }
+ $pool->clear;
+ }
+ $can_do_switch;
+}
+
+sub skip_unknown_revs {
+ my ($err) = @_;
+ my $errno = $err->apr_err();
+ # Maybe the branch we're tracking didn't
+ # exist when the repo started, so it's
+ # not an error if it doesn't, just continue
+ #
+ # Wonderfully consistent library, eh?
+ # 160013 - svn:// and file://
+ # 175002 - http(s)://
+ # 175007 - http(s):// (this repo required authorization, too...)
+ # More codes may be discovered later...
+ if ($errno == 175007 || $errno == 175002 || $errno == 160013) {
+ my $err_key = $err->expanded_message;
+ # revision numbers change every time, filter them out
+ $err_key =~ s/\d+/\0/g;
+ $err_key = "$errno\0$err_key";
+ unless ($ignored_err{$err_key}) {
+ warn "W: Ignoring error from SVN, path probably ",
+ "does not exist: ($errno): ",
+ $err->expanded_message,"\n";
+ warn "W: Do not be alarmed at the above message ",
+ "git-svn is just searching aggressively for ",
+ "old history.\n",
+ "This may take a while on large repositories\n";
+ $ignored_err{$err_key} = 1;
+ }
+ return;
+ }
+ die "Error from SVN, ($errno): ", $err->expanded_message,"\n";
+}
+
+1;
+__END__
+
+=head1 NAME
+
+Git::SVN::Ra - Subversion remote access functions for git-svn
+
+=head1 SYNOPSIS
+
+ use Git::SVN::Ra;
+
+ my $ra = Git::SVN::Ra->new($branchurl);
+ my ($dirents, $fetched_revnum, $props) =
+ $ra->get_dir('.', $SVN::Core::INVALID_REVNUM);
+
+=head1 DESCRIPTION
+
+This is a wrapper around the L<SVN::Ra> module for use by B<git-svn>.
+It fills in some default parameters (such as the authentication
+scheme), smooths over incompatibilities between libsvn versions, adds
+caching, and implements some functions specific to B<git-svn>.
+
+Do not use it unless you are developing git-svn. The interface will
+change as git-svn evolves.
+
+=head1 DEPENDENCIES
+
+Subversion perl bindings,
+L<Git::SVN>.
+
+C<Git::SVN::Ra> has not been tested using callers other than
+B<git-svn> itself.
+
+=head1 SEE ALSO
+
+L<SVN::Ra>.
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+=head1 BUGS
+
+None.
diff --git a/perl/Git/SVN/Utils.pm b/perl/Git/SVN/Utils.pm
new file mode 100644
index 0000000000..3d1a0933a2
--- /dev/null
+++ b/perl/Git/SVN/Utils.pm
@@ -0,0 +1,232 @@
+package Git::SVN::Utils;
+
+use strict;
+use warnings;
+
+use SVN::Core;
+
+use base qw(Exporter);
+
+our @EXPORT_OK = qw(
+ fatal
+ can_compress
+ canonicalize_path
+ canonicalize_url
+ join_paths
+ add_path_to_url
+);
+
+
+=head1 NAME
+
+Git::SVN::Utils - utility functions used across Git::SVN
+
+=head1 SYNOPSIS
+
+ use Git::SVN::Utils qw(functions to import);
+
+=head1 DESCRIPTION
+
+This module contains functions which are useful across many different
+parts of Git::SVN. Mostly it's a place to put utility functions
+rather than duplicate the code or have classes grabbing at other
+classes.
+
+=head1 FUNCTIONS
+
+All functions can be imported only on request.
+
+=head3 fatal
+
+ fatal(@message);
+
+Display a message and exit with a fatal error code.
+
+=cut
+
+# Note: not certain why this is in use instead of die. Probably because
+# the exit code of die is 255? Doesn't appear to be used consistently.
+sub fatal (@) { print STDERR "@_\n"; exit 1 }
+
+
+=head3 can_compress
+
+ my $can_compress = can_compress;
+
+Returns true if Compress::Zlib is available, false otherwise.
+
+=cut
+
+my $can_compress;
+sub can_compress {
+ return $can_compress if defined $can_compress;
+
+ return $can_compress = eval { require Compress::Zlib; };
+}
+
+
+=head3 canonicalize_path
+
+ my $canoncalized_path = canonicalize_path($path);
+
+Converts $path into a canonical form which is safe to pass to the SVN
+API as a file path.
+
+=cut
+
+# Turn foo/../bar into bar
+sub _collapse_dotdot {
+ my $path = shift;
+
+ 1 while $path =~ s{/[^/]+/+\.\.}{};
+ 1 while $path =~ s{[^/]+/+\.\./}{};
+ 1 while $path =~ s{[^/]+/+\.\.}{};
+
+ return $path;
+}
+
+
+sub canonicalize_path {
+ my $path = shift;
+ my $rv;
+
+ # The 1.7 way to do it
+ if ( defined &SVN::_Core::svn_dirent_canonicalize ) {
+ $path = _collapse_dotdot($path);
+ $rv = SVN::_Core::svn_dirent_canonicalize($path);
+ }
+ # The 1.6 way to do it
+ # This can return undef on subversion-perl-1.4.2-2.el5 (CentOS 5.2)
+ elsif ( defined &SVN::_Core::svn_path_canonicalize ) {
+ $path = _collapse_dotdot($path);
+ $rv = SVN::_Core::svn_path_canonicalize($path);
+ }
+
+ return $rv if defined $rv;
+
+ # No SVN API canonicalization is available, or the SVN API
+ # didn't return a successful result, do it ourselves
+ return _canonicalize_path_ourselves($path);
+}
+
+
+sub _canonicalize_path_ourselves {
+ my ($path) = @_;
+ my $dot_slash_added = 0;
+ if (substr($path, 0, 1) ne "/") {
+ $path = "./" . $path;
+ $dot_slash_added = 1;
+ }
+ $path =~ s#/+#/#g;
+ $path =~ s#/\.(?:/|$)#/#g;
+ $path = _collapse_dotdot($path);
+ $path =~ s#/$##g;
+ $path =~ s#^\./## if $dot_slash_added;
+ $path =~ s#^\.$##;
+ return $path;
+}
+
+
+=head3 canonicalize_url
+
+ my $canonicalized_url = canonicalize_url($url);
+
+Converts $url into a canonical form which is safe to pass to the SVN
+API as a URL.
+
+=cut
+
+sub canonicalize_url {
+ my $url = shift;
+
+ # The 1.7 way to do it
+ if ( defined &SVN::_Core::svn_uri_canonicalize ) {
+ return SVN::_Core::svn_uri_canonicalize($url);
+ }
+ # There wasn't a 1.6 way to do it, so we do it ourself.
+ else {
+ return _canonicalize_url_ourselves($url);
+ }
+}
+
+
+sub _canonicalize_url_path {
+ my ($uri_path) = @_;
+
+ my @parts;
+ foreach my $part (split m{/+}, $uri_path) {
+ $part =~ s/([^!\$%&'()*+,.\/\w:=\@_`~-]|%(?![a-fA-F0-9]{2}))/sprintf("%%%02X",ord($1))/eg;
+ push @parts, $part;
+ }
+
+ return join('/', @parts);
+}
+
+sub _canonicalize_url_ourselves {
+ my ($url) = @_;
+ if ($url =~ m#^([^:]+)://([^/]*)(.*)$#) {
+ my ($scheme, $domain, $uri) = ($1, $2, _canonicalize_url_path(canonicalize_path($3)));
+ $url = "$scheme://$domain$uri";
+ }
+ $url;
+}
+
+
+=head3 join_paths
+
+ my $new_path = join_paths(@paths);
+
+Appends @paths together into a single path. Any empty paths are ignored.
+
+=cut
+
+sub join_paths {
+ my @paths = @_;
+
+ @paths = grep { defined $_ && length $_ } @paths;
+
+ return '' unless @paths;
+ return $paths[0] if @paths == 1;
+
+ my $new_path = shift @paths;
+ $new_path =~ s{/+$}{};
+
+ my $last_path = pop @paths;
+ $last_path =~ s{^/+}{};
+
+ for my $path (@paths) {
+ $path =~ s{^/+}{};
+ $path =~ s{/+$}{};
+ $new_path .= "/$path";
+ }
+
+ return $new_path .= "/$last_path";
+}
+
+
+=head3 add_path_to_url
+
+ my $new_url = add_path_to_url($url, $path);
+
+Appends $path onto the $url. If $path is empty, $url is returned unchanged.
+
+=cut
+
+sub add_path_to_url {
+ my($url, $path) = @_;
+
+ return $url if !defined $path or !length $path;
+
+ # Strip trailing and leading slashes so we don't
+ # wind up with http://x.com///path
+ $url =~ s{/+$}{};
+ $path =~ s{^/+}{};
+
+ # If a path has a % in it, URI escape it so it's not
+ # mistaken for a URI escape later.
+ $path =~ s{%}{%25}g;
+
+ return join '/', $url, $path;
+}
+
+1;