summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xgit-svn.perl507
-rw-r--r--perl/Git/SVN/Fetcher.pm602
-rw-r--r--perl/Makefile.PL1
3 files changed, 605 insertions, 505 deletions
diff --git a/git-svn.perl b/git-svn.perl
index c49b074102..7870cc15c1 100755
--- a/git-svn.perl
+++ b/git-svn.perl
@@ -69,7 +69,6 @@ sub _req_svn {
my $can_compress = eval { require Compress::Zlib; 1};
push @Git::SVN::Ra::ISA, 'SVN::Ra';
push @Git::SVN::Editor::ISA, 'SVN::Delta::Editor';
-push @Git::SVN::Fetcher::ISA, 'SVN::Delta::Editor';
use Carp qw/croak/;
use Digest::MD5;
use IO::File qw//;
@@ -80,6 +79,7 @@ use File::Find;
use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/;
use IPC::Open3;
use Git;
+use Git::SVN::Fetcher qw//;
use Git::SVN::Prompt qw//;
use Memoize; # core since 5.8.0, Jul 2002
@@ -89,7 +89,7 @@ BEGIN {
foreach (qw/command command_oneline command_noisy command_output_pipe
command_input_pipe command_close_pipe
command_bidi_pipe command_close_bidi_pipe/) {
- for my $package ( qw(Git::SVN::Editor Git::SVN::Fetcher
+ for my $package ( qw(Git::SVN::Editor
Git::SVN::Migration Git::SVN::Log Git::SVN),
__PACKAGE__) {
*{"${package}::$_"} = \&{"Git::$_"};
@@ -4328,509 +4328,6 @@ sub remove_username {
$_[0] =~ s{^([^:]*://)[^@]+@}{$1};
}
-package Git::SVN::Fetcher;
-use vars qw/@ISA $_ignore_regex $_preserve_empty_dirs $_placeholder_filename
- @deleted_gpath %added_placeholder $repo_id/;
-use strict;
-use warnings;
-use Carp qw/croak/;
-use File::Basename qw/dirname/;
-use IO::File qw//;
-
-# file baton members: path, mode_a, mode_b, pool, fh, blob, base
-sub new {
- my ($class, $git_svn, $switch_path) = @_;
- my $self = SVN::Delta::Editor->new;
- bless $self, $class;
- if (exists $git_svn->{last_commit}) {
- $self->{c} = $git_svn->{last_commit};
- $self->{empty_symlinks} =
- _mark_empty_symlinks($git_svn, $switch_path);
- }
-
- # some options are read globally, but can be overridden locally
- # per [svn-remote "..."] section. Command-line options will *NOT*
- # override options set in an [svn-remote "..."] section
- $repo_id = $git_svn->{repo_id};
- my $k = "svn-remote.$repo_id.ignore-paths";
- my $v = eval { command_oneline('config', '--get', $k) };
- $self->{ignore_regex} = $v;
-
- $k = "svn-remote.$repo_id.preserve-empty-dirs";
- $v = eval { command_oneline('config', '--get', '--bool', $k) };
- if ($v && $v eq 'true') {
- $_preserve_empty_dirs = 1;
- $k = "svn-remote.$repo_id.placeholder-filename";
- $v = eval { command_oneline('config', '--get', $k) };
- $_placeholder_filename = $v;
- }
-
- # Load the list of placeholder files added during previous invocations.
- $k = "svn-remote.$repo_id.added-placeholder";
- $v = eval { command_oneline('config', '--get-all', $k) };
- if ($_preserve_empty_dirs && $v) {
- # command() prints errors to stderr, so we only call it if
- # command_oneline() succeeded.
- my @v = command('config', '--get-all', $k);
- $added_placeholder{ dirname($_) } = $_ foreach @v;
- }
-
- $self->{empty} = {};
- $self->{dir_prop} = {};
- $self->{file_prop} = {};
- $self->{absent_dir} = {};
- $self->{absent_file} = {};
- $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
- $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
- $self;
-}
-
-# this uses the Ra object, so it must be called before do_{switch,update},
-# not inside them (when the Git::SVN::Fetcher object is passed) to
-# do_{switch,update}
-sub _mark_empty_symlinks {
- my ($git_svn, $switch_path) = @_;
- my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
- return {} if (!defined($bool)) || (defined($bool) && ! $bool);
-
- my %ret;
- my ($rev, $cmt) = $git_svn->last_rev_commit;
- return {} unless ($rev && $cmt);
-
- # allow the warning to be printed for each revision we fetch to
- # ensure the user sees it. The user can also disable the workaround
- # on the repository even while git svn is running and the next
- # revision fetched will skip this expensive function.
- my $printed_warning;
- chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
- my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
- local $/ = "\0";
- my $pfx = defined($switch_path) ? $switch_path : $git_svn->{path};
- $pfx .= '/' if length($pfx);
- while (<$ls>) {
- chomp;
- s/\A100644 blob $empty_blob\t//o or next;
- unless ($printed_warning) {
- print STDERR "Scanning for empty symlinks, ",
- "this may take a while if you have ",
- "many empty files\n",
- "You may disable this with `",
- "git config svn.brokenSymlinkWorkaround ",
- "false'.\n",
- "This may be done in a different ",
- "terminal without restarting ",
- "git svn\n";
- $printed_warning = 1;
- }
- my $path = $_;
- my (undef, $props) =
- $git_svn->ra->get_file($pfx.$path, $rev, undef);
- if ($props->{'svn:special'}) {
- $ret{$path} = 1;
- }
- }
- command_close_pipe($ls, $ctx);
- \%ret;
-}
-
-# returns true if a given path is inside a ".git" directory
-sub in_dot_git {
- $_[0] =~ m{(?:^|/)\.git(?:/|$)};
-}
-
-# return value: 0 -- don't ignore, 1 -- ignore
-sub is_path_ignored {
- my ($self, $path) = @_;
- return 1 if in_dot_git($path);
- return 1 if defined($self->{ignore_regex}) &&
- $path =~ m!$self->{ignore_regex}!;
- return 0 unless defined($_ignore_regex);
- return 1 if $path =~ m!$_ignore_regex!o;
- return 0;
-}
-
-sub set_path_strip {
- my ($self, $path) = @_;
- $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
-}
-
-sub open_root {
- { path => '' };
-}
-
-sub open_directory {
- my ($self, $path, $pb, $rev) = @_;
- { path => $path };
-}
-
-sub git_path {
- my ($self, $path) = @_;
- if (my $enc = $self->{pathnameencoding}) {
- require Encode;
- Encode::from_to($path, 'UTF-8', $enc);
- }
- if ($self->{path_strip}) {
- $path =~ s!$self->{path_strip}!! or
- die "Failed to strip path '$path' ($self->{path_strip})\n";
- }
- $path;
-}
-
-sub delete_entry {
- my ($self, $path, $rev, $pb) = @_;
- return undef if $self->is_path_ignored($path);
-
- my $gpath = $self->git_path($path);
- return undef if ($gpath eq '');
-
- # remove entire directories.
- my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
- =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
- if ($tree) {
- my ($ls, $ctx) = command_output_pipe(qw/ls-tree
- -r --name-only -z/,
- $tree);
- local $/ = "\0";
- while (<$ls>) {
- chomp;
- my $rmpath = "$gpath/$_";
- $self->{gii}->remove($rmpath);
- print "\tD\t$rmpath\n" unless $::_q;
- }
- print "\tD\t$gpath/\n" unless $::_q;
- command_close_pipe($ls, $ctx);
- } else {
- $self->{gii}->remove($gpath);
- print "\tD\t$gpath\n" unless $::_q;
- }
- # Don't add to @deleted_gpath if we're deleting a placeholder file.
- push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
- $self->{empty}->{$path} = 0;
- undef;
-}
-
-sub open_file {
- my ($self, $path, $pb, $rev) = @_;
- my ($mode, $blob);
-
- goto out if $self->is_path_ignored($path);
-
- my $gpath = $self->git_path($path);
- ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
- =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
- unless (defined $mode && defined $blob) {
- die "$path was not found in commit $self->{c} (r$rev)\n";
- }
- if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
- $mode = '120000';
- }
-out:
- { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
- pool => SVN::Pool->new, action => 'M' };
-}
-
-sub add_file {
- my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
- my $mode;
-
- if (!$self->is_path_ignored($path)) {
- my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
- delete $self->{empty}->{$dir};
- $mode = '100644';
-
- if ($added_placeholder{$dir}) {
- # Remove our placeholder file, if we created one.
- delete_entry($self, $added_placeholder{$dir})
- unless $path eq $added_placeholder{$dir};
- delete $added_placeholder{$dir}
- }
- }
-
- { path => $path, mode_a => $mode, mode_b => $mode,
- pool => SVN::Pool->new, action => 'A' };
-}
-
-sub add_directory {
- my ($self, $path, $cp_path, $cp_rev) = @_;
- goto out if $self->is_path_ignored($path);
- my $gpath = $self->git_path($path);
- if ($gpath eq '') {
- my ($ls, $ctx) = command_output_pipe(qw/ls-tree
- -r --name-only -z/,
- $self->{c});
- local $/ = "\0";
- while (<$ls>) {
- chomp;
- $self->{gii}->remove($_);
- print "\tD\t$_\n" unless $::_q;
- push @deleted_gpath, $gpath;
- }
- command_close_pipe($ls, $ctx);
- $self->{empty}->{$path} = 0;
- }
- my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
- delete $self->{empty}->{$dir};
- $self->{empty}->{$path} = 1;
-
- if ($added_placeholder{$dir}) {
- # Remove our placeholder file, if we created one.
- delete_entry($self, $added_placeholder{$dir});
- delete $added_placeholder{$dir}
- }
-
-out:
- { path => $path };
-}
-
-sub change_dir_prop {
- my ($self, $db, $prop, $value) = @_;
- return undef if $self->is_path_ignored($db->{path});
- $self->{dir_prop}->{$db->{path}} ||= {};
- $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
- undef;
-}
-
-sub absent_directory {
- my ($self, $path, $pb) = @_;
- return undef if $self->is_path_ignored($path);
- $self->{absent_dir}->{$pb->{path}} ||= [];
- push @{$self->{absent_dir}->{$pb->{path}}}, $path;
- undef;
-}
-
-sub absent_file {
- my ($self, $path, $pb) = @_;
- return undef if $self->is_path_ignored($path);
- $self->{absent_file}->{$pb->{path}} ||= [];
- push @{$self->{absent_file}->{$pb->{path}}}, $path;
- undef;
-}
-
-sub change_file_prop {
- my ($self, $fb, $prop, $value) = @_;
- return undef if $self->is_path_ignored($fb->{path});
- if ($prop eq 'svn:executable') {
- if ($fb->{mode_b} != 120000) {
- $fb->{mode_b} = defined $value ? 100755 : 100644;
- }
- } elsif ($prop eq 'svn:special') {
- $fb->{mode_b} = defined $value ? 120000 : 100644;
- } else {
- $self->{file_prop}->{$fb->{path}} ||= {};
- $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
- }
- undef;
-}
-
-sub apply_textdelta {
- my ($self, $fb, $exp) = @_;
- return undef if $self->is_path_ignored($fb->{path});
- my $fh = $::_repository->temp_acquire('svn_delta');
- # $fh gets auto-closed() by SVN::TxDelta::apply(),
- # (but $base does not,) so dup() it for reading in close_file
- open my $dup, '<&', $fh or croak $!;
- my $base = $::_repository->temp_acquire('git_blob');
-
- if ($fb->{blob}) {
- my ($base_is_link, $size);
-
- if ($fb->{mode_a} eq '120000' &&
- ! $self->{empty_symlinks}->{$fb->{path}}) {
- print $base 'link ' or die "print $!\n";
- $base_is_link = 1;
- }
- retry:
- $size = $::_repository->cat_blob($fb->{blob}, $base);
- die "Failed to read object $fb->{blob}" if ($size < 0);
-
- if (defined $exp) {
- seek $base, 0, 0 or croak $!;
- my $got = ::md5sum($base);
- if ($got ne $exp) {
- my $err = "Checksum mismatch: ".
- "$fb->{path} $fb->{blob}\n" .
- "expected: $exp\n" .
- " got: $got\n";
- if ($base_is_link) {
- warn $err,
- "Retrying... (possibly ",
- "a bad symlink from SVN)\n";
- $::_repository->temp_reset($base);
- $base_is_link = 0;
- goto retry;
- }
- die $err;
- }
- }
- }
- seek $base, 0, 0 or croak $!;
- $fb->{fh} = $fh;
- $fb->{base} = $base;
- [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
-}
-
-sub close_file {
- my ($self, $fb, $exp) = @_;
- return undef if $self->is_path_ignored($fb->{path});
-
- my $hash;
- my $path = $self->git_path($fb->{path});
- if (my $fh = $fb->{fh}) {
- if (defined $exp) {
- seek($fh, 0, 0) or croak $!;
- my $got = ::md5sum($fh);
- if ($got ne $exp) {
- die "Checksum mismatch: $path\n",
- "expected: $exp\n got: $got\n";
- }
- }
- if ($fb->{mode_b} == 120000) {
- sysseek($fh, 0, 0) or croak $!;
- my $rd = sysread($fh, my $buf, 5);
-
- if (!defined $rd) {
- croak "sysread: $!\n";
- } elsif ($rd == 0) {
- warn "$path has mode 120000",
- " but it points to nothing\n",
- "converting to an empty file with mode",
- " 100644\n";
- $fb->{mode_b} = '100644';
- } elsif ($buf ne 'link ') {
- warn "$path has mode 120000",
- " but is not a link\n";
- } else {
- my $tmp_fh = $::_repository->temp_acquire(
- 'svn_hash');
- my $res;
- while ($res = sysread($fh, my $str, 1024)) {
- my $out = syswrite($tmp_fh, $str, $res);
- defined($out) && $out == $res
- or croak("write ",
- Git::temp_path($tmp_fh),
- ": $!\n");
- }
- defined $res or croak $!;
-
- ($fh, $tmp_fh) = ($tmp_fh, $fh);
- Git::temp_release($tmp_fh, 1);
- }
- }
-
- $hash = $::_repository->hash_and_insert_object(
- Git::temp_path($fh));
- $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
-
- Git::temp_release($fb->{base}, 1);
- Git::temp_release($fh, 1);
- } else {
- $hash = $fb->{blob} or die "no blob information\n";
- }
- $fb->{pool}->clear;
- $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
- print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
- undef;
-}
-
-sub abort_edit {
- my $self = shift;
- $self->{nr} = $self->{gii}->{nr};
- delete $self->{gii};
- $self->SUPER::abort_edit(@_);
-}
-
-sub close_edit {
- my $self = shift;
-
- if ($_preserve_empty_dirs) {
- my @empty_dirs;
-
- # Any entry flagged as empty that also has an associated
- # dir_prop represents a newly created empty directory.
- foreach my $i (keys %{$self->{empty}}) {
- push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
- }
-
- # Search for directories that have become empty due subsequent
- # file deletes.
- push @empty_dirs, $self->find_empty_directories();
-
- # Finally, add a placeholder file to each empty directory.
- $self->add_placeholder_file($_) foreach (@empty_dirs);
-
- $self->stash_placeholder_list();
- }
-
- $self->{git_commit_ok} = 1;
- $self->{nr} = $self->{gii}->{nr};
- delete $self->{gii};
- $self->SUPER::close_edit(@_);
-}
-
-sub find_empty_directories {
- my ($self) = @_;
- my @empty_dirs;
- my %dirs = map { dirname($_) => 1 } @deleted_gpath;
-
- foreach my $dir (sort keys %dirs) {
- next if $dir eq ".";
-
- # If there have been any additions to this directory, there is
- # no reason to check if it is empty.
- my $skip_added = 0;
- foreach my $t (qw/dir_prop file_prop/) {
- foreach my $path (keys %{ $self->{$t} }) {
- if (exists $self->{$t}->{dirname($path)}) {
- $skip_added = 1;
- last;
- }
- }
- last if $skip_added;
- }
- next if $skip_added;
-
- # Use `git ls-tree` to get the filenames of this directory
- # that existed prior to this particular commit.
- my $ls = command('ls-tree', '-z', '--name-only',
- $self->{c}, "$dir/");
- my %files = map { $_ => 1 } split(/\0/, $ls);
-
- # Remove the filenames that were deleted during this commit.
- delete $files{$_} foreach (@deleted_gpath);
-
- # Report the directory if there are no filenames left.
- push @empty_dirs, $dir unless (scalar %files);
- }
- @empty_dirs;
-}
-
-sub add_placeholder_file {
- my ($self, $dir) = @_;
- my $path = "$dir/$_placeholder_filename";
- my $gpath = $self->git_path($path);
-
- my $fh = $::_repository->temp_acquire($gpath);
- my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
- Git::temp_release($fh, 1);
- $self->{gii}->update('100644', $hash, $gpath) or croak $!;
-
- # The directory should no longer be considered empty.
- delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
-
- # Keep track of any placeholder files we create.
- $added_placeholder{$dir} = $path;
-}
-
-sub stash_placeholder_list {
- my ($self) = @_;
- my $k = "svn-remote.$repo_id.added-placeholder";
- my $v = eval { command_oneline('config', '--get-all', $k) };
- command_noisy('config', '--unset-all', $k) if $v;
- foreach (values %added_placeholder) {
- command_noisy('config', '--add', $k, $_);
- }
-}
-
package Git::SVN::Editor;
use vars qw/@ISA $_rmdir $_cp_similarity $_find_copies_harder $_rename_limit/;
use strict;
diff --git a/perl/Git/SVN/Fetcher.pm b/perl/Git/SVN/Fetcher.pm
new file mode 100644
index 0000000000..4e9c77d757
--- /dev/null
+++ b/perl/Git/SVN/Fetcher.pm
@@ -0,0 +1,602 @@
+package Git::SVN::Fetcher;
+use vars qw/@ISA $_ignore_regex $_preserve_empty_dirs $_placeholder_filename
+ @deleted_gpath %added_placeholder $repo_id/;
+use strict;
+use warnings;
+use SVN::Delta;
+use Carp qw/croak/;
+use File::Basename qw/dirname/;
+use IO::File qw//;
+use Git qw/command command_oneline command_noisy command_output_pipe
+ command_input_pipe command_close_pipe
+ command_bidi_pipe command_close_bidi_pipe/;
+BEGIN {
+ @ISA = qw(SVN::Delta::Editor);
+}
+
+# file baton members: path, mode_a, mode_b, pool, fh, blob, base
+sub new {
+ my ($class, $git_svn, $switch_path) = @_;
+ my $self = SVN::Delta::Editor->new;
+ bless $self, $class;
+ if (exists $git_svn->{last_commit}) {
+ $self->{c} = $git_svn->{last_commit};
+ $self->{empty_symlinks} =
+ _mark_empty_symlinks($git_svn, $switch_path);
+ }
+
+ # some options are read globally, but can be overridden locally
+ # per [svn-remote "..."] section. Command-line options will *NOT*
+ # override options set in an [svn-remote "..."] section
+ $repo_id = $git_svn->{repo_id};
+ my $k = "svn-remote.$repo_id.ignore-paths";
+ my $v = eval { command_oneline('config', '--get', $k) };
+ $self->{ignore_regex} = $v;
+
+ $k = "svn-remote.$repo_id.preserve-empty-dirs";
+ $v = eval { command_oneline('config', '--get', '--bool', $k) };
+ if ($v && $v eq 'true') {
+ $_preserve_empty_dirs = 1;
+ $k = "svn-remote.$repo_id.placeholder-filename";
+ $v = eval { command_oneline('config', '--get', $k) };
+ $_placeholder_filename = $v;
+ }
+
+ # Load the list of placeholder files added during previous invocations.
+ $k = "svn-remote.$repo_id.added-placeholder";
+ $v = eval { command_oneline('config', '--get-all', $k) };
+ if ($_preserve_empty_dirs && $v) {
+ # command() prints errors to stderr, so we only call it if
+ # command_oneline() succeeded.
+ my @v = command('config', '--get-all', $k);
+ $added_placeholder{ dirname($_) } = $_ foreach @v;
+ }
+
+ $self->{empty} = {};
+ $self->{dir_prop} = {};
+ $self->{file_prop} = {};
+ $self->{absent_dir} = {};
+ $self->{absent_file} = {};
+ $self->{gii} = $git_svn->tmp_index_do(sub { Git::IndexInfo->new });
+ $self->{pathnameencoding} = Git::config('svn.pathnameencoding');
+ $self;
+}
+
+# this uses the Ra object, so it must be called before do_{switch,update},
+# not inside them (when the Git::SVN::Fetcher object is passed) to
+# do_{switch,update}
+sub _mark_empty_symlinks {
+ my ($git_svn, $switch_path) = @_;
+ my $bool = Git::config_bool('svn.brokenSymlinkWorkaround');
+ return {} if (!defined($bool)) || (defined($bool) && ! $bool);
+
+ my %ret;
+ my ($rev, $cmt) = $git_svn->last_rev_commit;
+ return {} unless ($rev && $cmt);
+
+ # allow the warning to be printed for each revision we fetch to
+ # ensure the user sees it. The user can also disable the workaround
+ # on the repository even while git svn is running and the next
+ # revision fetched will skip this expensive function.
+ my $printed_warning;
+ chomp(my $empty_blob = `git hash-object -t blob --stdin < /dev/null`);
+ my ($ls, $ctx) = command_output_pipe(qw/ls-tree -r -z/, $cmt);
+ local $/ = "\0";
+ my $pfx = defined($switch_path) ? $switch_path : $git_svn->{path};
+ $pfx .= '/' if length($pfx);
+ while (<$ls>) {
+ chomp;
+ s/\A100644 blob $empty_blob\t//o or next;
+ unless ($printed_warning) {
+ print STDERR "Scanning for empty symlinks, ",
+ "this may take a while if you have ",
+ "many empty files\n",
+ "You may disable this with `",
+ "git config svn.brokenSymlinkWorkaround ",
+ "false'.\n",
+ "This may be done in a different ",
+ "terminal without restarting ",
+ "git svn\n";
+ $printed_warning = 1;
+ }
+ my $path = $_;
+ my (undef, $props) =
+ $git_svn->ra->get_file($pfx.$path, $rev, undef);
+ if ($props->{'svn:special'}) {
+ $ret{$path} = 1;
+ }
+ }
+ command_close_pipe($ls, $ctx);
+ \%ret;
+}
+
+# returns true if a given path is inside a ".git" directory
+sub in_dot_git {
+ $_[0] =~ m{(?:^|/)\.git(?:/|$)};
+}
+
+# return value: 0 -- don't ignore, 1 -- ignore
+sub is_path_ignored {
+ my ($self, $path) = @_;
+ return 1 if in_dot_git($path);
+ return 1 if defined($self->{ignore_regex}) &&
+ $path =~ m!$self->{ignore_regex}!;
+ return 0 unless defined($_ignore_regex);
+ return 1 if $path =~ m!$_ignore_regex!o;
+ return 0;
+}
+
+sub set_path_strip {
+ my ($self, $path) = @_;
+ $self->{path_strip} = qr/^\Q$path\E(\/|$)/ if length $path;
+}
+
+sub open_root {
+ { path => '' };
+}
+
+sub open_directory {
+ my ($self, $path, $pb, $rev) = @_;
+ { path => $path };
+}
+
+sub git_path {
+ my ($self, $path) = @_;
+ if (my $enc = $self->{pathnameencoding}) {
+ require Encode;
+ Encode::from_to($path, 'UTF-8', $enc);
+ }
+ if ($self->{path_strip}) {
+ $path =~ s!$self->{path_strip}!! or
+ die "Failed to strip path '$path' ($self->{path_strip})\n";
+ }
+ $path;
+}
+
+sub delete_entry {
+ my ($self, $path, $rev, $pb) = @_;
+ return undef if $self->is_path_ignored($path);
+
+ my $gpath = $self->git_path($path);
+ return undef if ($gpath eq '');
+
+ # remove entire directories.
+ my ($tree) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
+ =~ /\A040000 tree ([a-f\d]{40})\t\Q$gpath\E\0/);
+ if ($tree) {
+ my ($ls, $ctx) = command_output_pipe(qw/ls-tree
+ -r --name-only -z/,
+ $tree);
+ local $/ = "\0";
+ while (<$ls>) {
+ chomp;
+ my $rmpath = "$gpath/$_";
+ $self->{gii}->remove($rmpath);
+ print "\tD\t$rmpath\n" unless $::_q;
+ }
+ print "\tD\t$gpath/\n" unless $::_q;
+ command_close_pipe($ls, $ctx);
+ } else {
+ $self->{gii}->remove($gpath);
+ print "\tD\t$gpath\n" unless $::_q;
+ }
+ # Don't add to @deleted_gpath if we're deleting a placeholder file.
+ push @deleted_gpath, $gpath unless $added_placeholder{dirname($path)};
+ $self->{empty}->{$path} = 0;
+ undef;
+}
+
+sub open_file {
+ my ($self, $path, $pb, $rev) = @_;
+ my ($mode, $blob);
+
+ goto out if $self->is_path_ignored($path);
+
+ my $gpath = $self->git_path($path);
+ ($mode, $blob) = (command('ls-tree', '-z', $self->{c}, "./$gpath")
+ =~ /\A(\d{6}) blob ([a-f\d]{40})\t\Q$gpath\E\0/);
+ unless (defined $mode && defined $blob) {
+ die "$path was not found in commit $self->{c} (r$rev)\n";
+ }
+ if ($mode eq '100644' && $self->{empty_symlinks}->{$path}) {
+ $mode = '120000';
+ }
+out:
+ { path => $path, mode_a => $mode, mode_b => $mode, blob => $blob,
+ pool => SVN::Pool->new, action => 'M' };
+}
+
+sub add_file {
+ my ($self, $path, $pb, $cp_path, $cp_rev) = @_;
+ my $mode;
+
+ if (!$self->is_path_ignored($path)) {
+ my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
+ delete $self->{empty}->{$dir};
+ $mode = '100644';
+
+ if ($added_placeholder{$dir}) {
+ # Remove our placeholder file, if we created one.
+ delete_entry($self, $added_placeholder{$dir})
+ unless $path eq $added_placeholder{$dir};
+ delete $added_placeholder{$dir}
+ }
+ }
+
+ { path => $path, mode_a => $mode, mode_b => $mode,
+ pool => SVN::Pool->new, action => 'A' };
+}
+
+sub add_directory {
+ my ($self, $path, $cp_path, $cp_rev) = @_;
+ goto out if $self->is_path_ignored($path);
+ my $gpath = $self->git_path($path);
+ if ($gpath eq '') {
+ my ($ls, $ctx) = command_output_pipe(qw/ls-tree
+ -r --name-only -z/,
+ $self->{c});
+ local $/ = "\0";
+ while (<$ls>) {
+ chomp;
+ $self->{gii}->remove($_);
+ print "\tD\t$_\n" unless $::_q;
+ push @deleted_gpath, $gpath;
+ }
+ command_close_pipe($ls, $ctx);
+ $self->{empty}->{$path} = 0;
+ }
+ my ($dir, $file) = ($path =~ m#^(.*?)/?([^/]+)$#);
+ delete $self->{empty}->{$dir};
+ $self->{empty}->{$path} = 1;
+
+ if ($added_placeholder{$dir}) {
+ # Remove our placeholder file, if we created one.
+ delete_entry($self, $added_placeholder{$dir});
+ delete $added_placeholder{$dir}
+ }
+
+out:
+ { path => $path };
+}
+
+sub change_dir_prop {
+ my ($self, $db, $prop, $value) = @_;
+ return undef if $self->is_path_ignored($db->{path});
+ $self->{dir_prop}->{$db->{path}} ||= {};
+ $self->{dir_prop}->{$db->{path}}->{$prop} = $value;
+ undef;
+}
+
+sub absent_directory {
+ my ($self, $path, $pb) = @_;
+ return undef if $self->is_path_ignored($path);
+ $self->{absent_dir}->{$pb->{path}} ||= [];
+ push @{$self->{absent_dir}->{$pb->{path}}}, $path;
+ undef;
+}
+
+sub absent_file {
+ my ($self, $path, $pb) = @_;
+ return undef if $self->is_path_ignored($path);
+ $self->{absent_file}->{$pb->{path}} ||= [];
+ push @{$self->{absent_file}->{$pb->{path}}}, $path;
+ undef;
+}
+
+sub change_file_prop {
+ my ($self, $fb, $prop, $value) = @_;
+ return undef if $self->is_path_ignored($fb->{path});
+ if ($prop eq 'svn:executable') {
+ if ($fb->{mode_b} != 120000) {
+ $fb->{mode_b} = defined $value ? 100755 : 100644;
+ }
+ } elsif ($prop eq 'svn:special') {
+ $fb->{mode_b} = defined $value ? 120000 : 100644;
+ } else {
+ $self->{file_prop}->{$fb->{path}} ||= {};
+ $self->{file_prop}->{$fb->{path}}->{$prop} = $value;
+ }
+ undef;
+}
+
+sub apply_textdelta {
+ my ($self, $fb, $exp) = @_;
+ return undef if $self->is_path_ignored($fb->{path});
+ my $fh = $::_repository->temp_acquire('svn_delta');
+ # $fh gets auto-closed() by SVN::TxDelta::apply(),
+ # (but $base does not,) so dup() it for reading in close_file
+ open my $dup, '<&', $fh or croak $!;
+ my $base = $::_repository->temp_acquire('git_blob');
+
+ if ($fb->{blob}) {
+ my ($base_is_link, $size);
+
+ if ($fb->{mode_a} eq '120000' &&
+ ! $self->{empty_symlinks}->{$fb->{path}}) {
+ print $base 'link ' or die "print $!\n";
+ $base_is_link = 1;
+ }
+ retry:
+ $size = $::_repository->cat_blob($fb->{blob}, $base);
+ die "Failed to read object $fb->{blob}" if ($size < 0);
+
+ if (defined $exp) {
+ seek $base, 0, 0 or croak $!;
+ my $got = ::md5sum($base);
+ if ($got ne $exp) {
+ my $err = "Checksum mismatch: ".
+ "$fb->{path} $fb->{blob}\n" .
+ "expected: $exp\n" .
+ " got: $got\n";
+ if ($base_is_link) {
+ warn $err,
+ "Retrying... (possibly ",
+ "a bad symlink from SVN)\n";
+ $::_repository->temp_reset($base);
+ $base_is_link = 0;
+ goto retry;
+ }
+ die $err;
+ }
+ }
+ }
+ seek $base, 0, 0 or croak $!;
+ $fb->{fh} = $fh;
+ $fb->{base} = $base;
+ [ SVN::TxDelta::apply($base, $dup, undef, $fb->{path}, $fb->{pool}) ];
+}
+
+sub close_file {
+ my ($self, $fb, $exp) = @_;
+ return undef if $self->is_path_ignored($fb->{path});
+
+ my $hash;
+ my $path = $self->git_path($fb->{path});
+ if (my $fh = $fb->{fh}) {
+ if (defined $exp) {
+ seek($fh, 0, 0) or croak $!;
+ my $got = ::md5sum($fh);
+ if ($got ne $exp) {
+ die "Checksum mismatch: $path\n",
+ "expected: $exp\n got: $got\n";
+ }
+ }
+ if ($fb->{mode_b} == 120000) {
+ sysseek($fh, 0, 0) or croak $!;
+ my $rd = sysread($fh, my $buf, 5);
+
+ if (!defined $rd) {
+ croak "sysread: $!\n";
+ } elsif ($rd == 0) {
+ warn "$path has mode 120000",
+ " but it points to nothing\n",
+ "converting to an empty file with mode",
+ " 100644\n";
+ $fb->{mode_b} = '100644';
+ } elsif ($buf ne 'link ') {
+ warn "$path has mode 120000",
+ " but is not a link\n";
+ } else {
+ my $tmp_fh = $::_repository->temp_acquire(
+ 'svn_hash');
+ my $res;
+ while ($res = sysread($fh, my $str, 1024)) {
+ my $out = syswrite($tmp_fh, $str, $res);
+ defined($out) && $out == $res
+ or croak("write ",
+ Git::temp_path($tmp_fh),
+ ": $!\n");
+ }
+ defined $res or croak $!;
+
+ ($fh, $tmp_fh) = ($tmp_fh, $fh);
+ Git::temp_release($tmp_fh, 1);
+ }
+ }
+
+ $hash = $::_repository->hash_and_insert_object(
+ Git::temp_path($fh));
+ $hash =~ /^[a-f\d]{40}$/ or die "not a sha1: $hash\n";
+
+ Git::temp_release($fb->{base}, 1);
+ Git::temp_release($fh, 1);
+ } else {
+ $hash = $fb->{blob} or die "no blob information\n";
+ }
+ $fb->{pool}->clear;
+ $self->{gii}->update($fb->{mode_b}, $hash, $path) or croak $!;
+ print "\t$fb->{action}\t$path\n" if $fb->{action} && ! $::_q;
+ undef;
+}
+
+sub abort_edit {
+ my $self = shift;
+ $self->{nr} = $self->{gii}->{nr};
+ delete $self->{gii};
+ $self->SUPER::abort_edit(@_);
+}
+
+sub close_edit {
+ my $self = shift;
+
+ if ($_preserve_empty_dirs) {
+ my @empty_dirs;
+
+ # Any entry flagged as empty that also has an associated
+ # dir_prop represents a newly created empty directory.
+ foreach my $i (keys %{$self->{empty}}) {
+ push @empty_dirs, $i if exists $self->{dir_prop}->{$i};
+ }
+
+ # Search for directories that have become empty due subsequent
+ # file deletes.
+ push @empty_dirs, $self->find_empty_directories();
+
+ # Finally, add a placeholder file to each empty directory.
+ $self->add_placeholder_file($_) foreach (@empty_dirs);
+
+ $self->stash_placeholder_list();
+ }
+
+ $self->{git_commit_ok} = 1;
+ $self->{nr} = $self->{gii}->{nr};
+ delete $self->{gii};
+ $self->SUPER::close_edit(@_);
+}
+
+sub find_empty_directories {
+ my ($self) = @_;
+ my @empty_dirs;
+ my %dirs = map { dirname($_) => 1 } @deleted_gpath;
+
+ foreach my $dir (sort keys %dirs) {
+ next if $dir eq ".";
+
+ # If there have been any additions to this directory, there is
+ # no reason to check if it is empty.
+ my $skip_added = 0;
+ foreach my $t (qw/dir_prop file_prop/) {
+ foreach my $path (keys %{ $self->{$t} }) {
+ if (exists $self->{$t}->{dirname($path)}) {
+ $skip_added = 1;
+ last;
+ }
+ }
+ last if $skip_added;
+ }
+ next if $skip_added;
+
+ # Use `git ls-tree` to get the filenames of this directory
+ # that existed prior to this particular commit.
+ my $ls = command('ls-tree', '-z', '--name-only',
+ $self->{c}, "$dir/");
+ my %files = map { $_ => 1 } split(/\0/, $ls);
+
+ # Remove the filenames that were deleted during this commit.
+ delete $files{$_} foreach (@deleted_gpath);
+
+ # Report the directory if there are no filenames left.
+ push @empty_dirs, $dir unless (scalar %files);
+ }
+ @empty_dirs;
+}
+
+sub add_placeholder_file {
+ my ($self, $dir) = @_;
+ my $path = "$dir/$_placeholder_filename";
+ my $gpath = $self->git_path($path);
+
+ my $fh = $::_repository->temp_acquire($gpath);
+ my $hash = $::_repository->hash_and_insert_object(Git::temp_path($fh));
+ Git::temp_release($fh, 1);
+ $self->{gii}->update('100644', $hash, $gpath) or croak $!;
+
+ # The directory should no longer be considered empty.
+ delete $self->{empty}->{$dir} if exists $self->{empty}->{$dir};
+
+ # Keep track of any placeholder files we create.
+ $added_placeholder{$dir} = $path;
+}
+
+sub stash_placeholder_list {
+ my ($self) = @_;
+ my $k = "svn-remote.$repo_id.added-placeholder";
+ my $v = eval { command_oneline('config', '--get-all', $k) };
+ command_noisy('config', '--unset-all', $k) if $v;
+ foreach (values %added_placeholder) {
+ command_noisy('config', '--add', $k, $_);
+ }
+}
+
+1;
+__END__
+
+Git::SVN::Fetcher - tree delta consumer for "git svn fetch"
+
+=head1 SYNOPSIS
+
+ use SVN::Core;
+ use SVN::Ra;
+ use Git::SVN;
+ use Git::SVN::Fetcher;
+ use Git;
+
+ my $gs = Git::SVN->find_by_url($url);
+ my $ra = SVN::Ra->new(url => $url);
+ my $editor = Git::SVN::Fetcher->new($gs);
+ my $reporter = $ra->do_update($SVN::Core::INVALID_REVNUM, '',
+ 1, $editor);
+ $reporter->set_path('', $old_rev, 0);
+ $reporter->finish_report;
+ my $tree = $gs->tmp_index_do(sub { command_oneline('write-tree') });
+
+ foreach my $path (keys %{$editor->{dir_prop}) {
+ my $props = $editor->{dir_prop}{$path};
+ foreach my $prop (keys %$props) {
+ print "property $prop at $path changed to $props->{$prop}\n";
+ }
+ }
+ foreach my $path (keys %{$editor->{empty}) {
+ my $action = $editor->{empty}{$path} ? 'added' : 'removed';
+ print "empty directory $path $action\n";
+ }
+ foreach my $path (keys %{$editor->{file_prop}) { ... }
+ foreach my $parent (keys %{$editor->{absent_dir}}) {
+ my @children = @{$editor->{abstent_dir}{$parent}};
+ print "cannot fetch directory $parent/$_: not authorized?\n"
+ foreach @children;
+ }
+ foreach my $parent (keys %{$editor->{absent_file}) { ... }
+
+=head1 DESCRIPTION
+
+This is a subclass of C<SVN::Delta::Editor>, which means it implements
+callbacks to act as a consumer of Subversion tree deltas. This
+particular implementation of those callbacks is meant to store
+information about the resulting content which B<git svn fetch> could
+use to populate new commits and new entries for F<unhandled.log>.
+More specifically:
+
+=over
+
+=item * Additions, removals, and modifications of files are propagated
+to git-svn's index file F<$GIT_DIR/svn/$refname/index> using
+B<git update-index>.
+
+=item * Changes in Subversion path properties are recorded in the
+C<dir_prop> and C<file_prop> fields (which are hashes).
+
+=item * Addition and removal of empty directories are indicated by
+entries with value 1 and 0 respectively in the C<empty> hash.
+
+=item * Paths that are present but cannot be conveyed (presumably due
+to permissions) are recorded in the C<absent_file> and
+C<absent_dirs> hashes. For each key, the corresponding value is
+a list of paths under that directory that were present but
+could not be conveyed.
+
+=back
+
+The interface is unstable. Do not use this module unless you are
+developing git-svn.
+
+=head1 DEPENDENCIES
+
+L<SVN::Delta> from the Subversion perl bindings,
+the core L<Carp>, L<File::Basename>, and L<IO::File> modules,
+and git's L<Git> helper module.
+
+C<Git::SVN::Fetcher> has not been tested using callers other than
+B<git-svn> itself.
+
+=head1 SEE ALSO
+
+L<SVN::Delta>.
+
+=head1 INCOMPATIBILITIES
+
+None reported.
+
+=head1 BUGS
+
+None.
diff --git a/perl/Makefile.PL b/perl/Makefile.PL
index 4d8e31d25f..424890a1a4 100644
--- a/perl/Makefile.PL
+++ b/perl/Makefile.PL
@@ -27,6 +27,7 @@ MAKE_FRAG
my %pm = (
'Git.pm' => '$(INST_LIBDIR)/Git.pm',
'Git/I18N.pm' => '$(INST_LIBDIR)/Git/I18N.pm',
+ 'Git/SVN/Fetcher.pm' => '$(INST_LIBDIR)/Git/SVN/Fetcher.pm',
'Git/SVN/Prompt.pm' => '$(INST_LIBDIR)/Git/SVN/Prompt.pm',
);