#!/usr/bin/env perl # Copyright (C) 2006, Eric Wong # License: GPL v2 or later use warnings; use strict; use vars qw/ $AUTHOR $VERSION $sha1 $sha1_short $_revision $_q $_authors %users/; $AUTHOR = 'Eric Wong '; $VERSION = '@@GIT_VERSION@@'; # From which subdir have we been invoked? my $cmd_dir_prefix = eval { command_oneline([qw/rev-parse --show-prefix/], STDERR => 0) } || ''; my $git_dir_user_set = 1 if defined $ENV{GIT_DIR}; $ENV{GIT_DIR} ||= '.git'; $Git::SVN::default_repo_id = 'svn'; $Git::SVN::default_ref_id = $ENV{GIT_SVN_ID} || 'git-svn'; $Git::SVN::Ra::_log_window_size = 100; $Git::SVN::Log::TZ = $ENV{TZ}; $ENV{TZ} = 'UTC'; $| = 1; # unbuffer STDOUT sub fatal (@) { print STDERR "@_\n"; exit 1 } require SVN::Core; # use()-ing this causes segfaults for me... *shrug* require SVN::Ra; require SVN::Delta; if ($SVN::Core::VERSION lt '1.1.0') { fatal "Need SVN::Core 1.1.0 or better (got $SVN::Core::VERSION)"; } push @Git::SVN::Ra::ISA, 'SVN::Ra'; push @SVN::Git::Editor::ISA, 'SVN::Delta::Editor'; push @SVN::Git::Fetcher::ISA, 'SVN::Delta::Editor'; use Carp qw/croak/; use IO::File qw//; use File::Basename qw/dirname basename/; use File::Path qw/mkpath/; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; use IPC::Open3; use Git; BEGIN { # import functions from Git into our packages, en masse no strict 'refs'; foreach (qw/command command_oneline command_noisy command_output_pipe command_input_pipe command_close_pipe/) { for my $package ( qw(SVN::Git::Editor SVN::Git::Fetcher Git::SVN::Migration Git::SVN::Log Git::SVN), __PACKAGE__) { *{"${package}::$_"} = \&{"Git::$_"}; } } } my ($SVN); $sha1 = qr/[a-f\d]{40}/; $sha1_short = qr/[a-f\d]{4,40}/; my ($_stdin, $_help, $_edit, $_message, $_file, $_template, $_shared, $_version, $_fetch_all, $_no_rebase, $_merge, $_strategy, $_dry_run, $_local, $_prefix, $_no_checkout, $_verbose); $Git::SVN::_follow_parent = 1; my %remote_opts = ( 'username=s' => \$Git::SVN::Prompt::_username, 'config-dir=s' => \$Git::SVN::Ra::config_dir, 'no-auth-cache' => \$Git::SVN::Prompt::_no_auth_cache ); my %fc_opts = ( 'follow-parent|follow!' => \$Git::SVN::_follow_parent, 'authors-file|A=s' => \$_authors, 'repack:i' => \$Git::SVN::_repack, 'noMetadata' => \$Git::SVN::_no_metadata, 'useSvmProps' => \$Git::SVN::_use_svm_props, 'useSvnsyncProps' => \$Git::SVN::_use_svnsync_props, 'log-window-size=i' => \$Git::SVN::Ra::_log_window_size, 'no-checkout' => \$_no_checkout, 'quiet|q' => \$_q, 'repack-flags|repack-args|repack-opts=s' => \$Git::SVN::_repack_flags, %remote_opts ); my ($_trunk, $_tags, $_branches, $_stdlayout); my %icv; my %init_opts = ( 'template=s' => \$_template, 'shared:s' => \$_shared, 'trunk|T=s' => \$_trunk, 'tags|t=s' => \$_tags, 'branches|b=s' => \$_branches, 'prefix=s' => \$_prefix, 'stdlayout|s' => \$_stdlayout, 'minimize-url|m' => \$Git::SVN::_minimize_url, 'no-metadata' => sub { $icv{noMetadata} = 1 }, 'use-svm-props' => sub { $icv{useSvmProps} = 1 }, 'use-svnsync-props' => sub { $icv{useSvnsyncProps} = 1 }, 'rewrite-root=s' => sub { $icv{rewriteRoot} = $_[1] }, %remote_opts ); my %cmt_opts = ( 'edit|e' => \$_edit, 'rmdir' => \$SVN::Git::Editor::_rmdir, 'find-copies-harder' => \$SVN::Git::Editor::_find_copies_harder, 'l=i' => \$SVN::Git::Editor::_rename_limit, 'copy-similarity|C=i'=> \$SVN::Git::Editor::_cp_similarity ); my %cmd = ( fetch => [ \&cmd_fetch, "Download new revisions from SVN", { 'revision|r=s' => \$_revision, 'fetch-all|all' => \$_fetch_all, %fc_opts } ], clone => [ \&cmd_clone, "Initialize and fetch revisions", { 'revision|r=s' => \$_revision, %fc_opts, %init_opts } ], init => [ \&cmd_init, "Initialize a repo for tracking" . " (requires URL argument)", \%init_opts ], 'multi-init' => [ \&cmd_multi_init, "Deprecated alias for ". "'$0 init -T -b -t'", \%init_opts ], dcommit => [ \&cmd_dcommit, 'Commit several diffs to merge with upstream', { 'merge|m|M' => \$_merge, 'strategy|s=s' => \$_strategy, 'verbose|v' => \$_verbose, 'dry-run|n' => \$_dry_run, 'fetch-all|all' => \$_fetch_all, 'no-rebase' => \$_no_rebase, %cmt_opts, %fc_opts } ], 'set-tree' => [ \&cmd_set_tree, "Set an SVN repository to a git tree-ish", { 'stdin|' => \$_stdin, %cmt_opts, %fc_opts, } ], 'create-ignore' => [ \&cmd_create_ignore, 'Create a .gitignore per svn:ignore', { 'revision|r=i' => \$_revision } ], 'propget' => [ \&cmd_propget, 'Print the value of a property on a file or directory', { 'revision|r=i' => \$_revision } ], 'proplist' => [ \&cmd_proplist, 'List all properties of a file or directory', { 'revision|r=i' => \$_revision } ], 'show-ignore' => [ \&cmd_show_ignore, "Show svn:ignore listings", { 'revision|r=i' => \$_revision } ], 'multi-fetch' => [ \&cmd_multi_fetch, "Deprecated alias for $0 fetch --all", { 'revision|r=s' => \$_revision, %fc_opts } ], 'migrate' => [ sub { }, # no-op, we automatically run this anyways, 'Migrate configuration/metadata/layout from previous versions of git-svn', { 'minimize' => \$Git::SVN::Migration::_minimize, %remote_opts } ], 'log' => [ \&Git::SVN::Log::cmd_show_log, 'Show commit logs', { 'limit=i' => \$Git::SVN::Log::limit, 'revision|r=s' => \$_revision, 'verbose|v' => \$Git::SVN::Log::verbose, 'incremental' => \$Git::SVN::Log::incremental, 'oneline' => \$Git::SVN::Log::oneline, 'show-commit' => \$Git::SVN::Log::show_commit, 'non-recursive' => \$Git::SVN::Log::non_recursive, 'authors-file|A=s' => \$_authors, 'color' => \$Git::SVN::Log::color, 'pager=s' => \$Git::SVN::Log::pager } ], 'find-rev' => [ \&cmd_find_rev, "Translate between SVN revision numbers and tree-ish", {} ], 'rebase' => [ \&cmd_rebase, "Fetch and rebase your working directory", { 'merge|m|M' => \$_merge, 'verbose|v' => \$_verbose, 'strategy|s=s' => \$_strategy, 'local|l' => \$_local, 'fetch-all|all' => \$_fetch_all, %fc_opts } ], 'commit-diff' => [ \&cmd_commit_diff, 'Commit a diff between two trees', { 'message|m=s' => \$_message, 'file|F=s' => \$_file, 'revision|r=s' => \$_revision, %cmt_opts } ], ); my $cmd; for (my $i = 0; $i < @ARGV; $i++) { if (defined $cmd{$ARGV[$i]}) { $cmd = $ARGV[$i]; splice @ARGV, $i, 1; last; } }; my %opts = %{$cmd{$cmd}->[2]} if (defined $cmd); read_repo_config(\%opts); Getopt::Long::Configure('pass_through') if ($cmd && $cmd eq 'log'); my $rv = GetOptions(%opts, 'help|H|h' => \$_help, 'version|V' => \$_version, 'minimize-connections' => \$Git::SVN::Migration::_minimize, 'id|i=s' => \$Git::SVN::default_ref_id, 'svn-remote|remote|R=s' => sub { $Git::SVN::no_reuse_existing = 1; $Git::SVN::default_repo_id = $_[1] }); exit 1 if (!$rv && $cmd && $cmd ne 'log'); usage(0) if $_help; version() if $_version; usage(1) unless defined $cmd; load_authors() if $_authors; # make sure we're always running unless ($cmd =~ /(?:clone|init|multi-init)$/) { unless (-d $ENV{GIT_DIR}) { if ($git_dir_user_set) { die "GIT_DIR=$ENV{GIT_DIR} explicitly set, ", "but it is not a directory\n"; } my $git_dir = delete $ENV{GIT_DIR}; chomp(my $cdup = command_oneline(qw/rev-parse --show-cdup/)); unless (length $cdup) { die "Already at toplevel, but $git_dir ", "not found '$cdup'\n"; } chdir $cdup or die "Unable to chdir up to '$cdup'\n"; unless (-d $git_dir) { die "$git_dir still not found after going to ", "'$cdup'\n"; } $ENV{GIT_DIR} = $git_dir; } } unless ($cmd =~ /^(?:clone|init|multi-init|commit-diff)$/) { Git::SVN::Migration::migration_check(); } Git::SVN::init_vars(); eval { Git::SVN::verify_remotes_sanity(); $cmd{$cmd}->[0]->(@ARGV); }; fatal $@ if $@; post_fetch_checkout(); exit 0; ####################### primary functions ###################### sub usage { my $exit = shift || 0; my $fd = $exit ? \*STDERR : \*STDOUT; print $fd <<""; git-svn - bidirectional operations between a single Subversion tree and git Usage: $0 [options] [arguments]\n print $fd "Available commands:\n" unless $cmd; foreach (sort keys %cmd) { next if $cmd && $cmd ne $_; next if /^multi-/; # don't show deprecated commands print $fd ' ',pack('A17',$_),$cmd{$_}->[1],"\n"; foreach (keys %{$cmd{$_}->[2]}) { # mixed-case options are for .git/config only next if /[A-Z]/ && /^[a-z]+$/i; # prints out arguments as they should be passed: my $x = s#[:=]s$## ? '' : s#[:=]i$## ? '' : ''; print $fd ' ' x 21, join(', ', map { length $_ > 1 ? "--$_" : "-$_" } split /\|/,$_)," $x\n"; } } print $fd <<""; \nGIT_SVN_ID may be set in the environment or via the --id/-i switch to an arbitrary identifier if you're tracking multiple SVN branches/repositories in one git repository and want to keep them separate. See git-svn(1) for more information. exit $exit; } sub version { print "git-svn version $VERSION (svn $SVN::Core::VERSION)\n"; exit 0; } sub do_git_init_db { unless (-d $ENV{GIT_DIR}) { my @init_db = ('init'); push @init_db, "--template=$_template" if defined $_template; if (defined $_shared) { if ($_shared =~ /[a-z]/) { push @init_db, "--shared=$_shared"; } else { push @init_db, "--shared"; } } command_noisy(@init_db); } my $set; my $pfx = "svn-remote.$Git::SVN::default_repo_id"; foreach my $i (keys %icv) { die "'$set' and '$i' cannot both be set\n" if $set; next unless defined $icv{$i}; command_noisy('config', "$pfx.$i", $icv{$i}); $set = $i; } } sub init_subdir { my $repo_path = shift or return; mkpath([$repo_path]) unless -d $repo_path; chdir $repo_path or die "Couldn't chdir to $repo_path: $!\n"; $ENV{GIT_DIR} = '.git'; } sub cmd_clone { my ($url, $path) = @_; if (!defined $path && (defined $_trunk || defined $_branches || defined $_tags || defined $_stdlayout) && $url !~ m#^[a-z\+]+://#) { $path = $url; } $path = basename($url) if !defined $path || !length $path; cmd_init($url, $path); Git::SVN::fetch_all($Git::SVN::default_repo_id); } sub cmd_init { if (defined $_stdlayout) { $_trunk = 'trunk' if (!defined $_trunk); $_tags = 'tags' if (!defined $_tags); $_branches = 'branches' if (!defined $_branches); } if (defined $_trunk || defined $_branches || defined $_tags) { return cmd_multi_init(@_); } my $url = shift or die "SVN repository location required ", "as a command-line argument\n"; init_subdir(@_); do_git_init_db(); Git::SVN->init($url); } sub cmd_fetch { if (grep /^\d+=./, @_) { die "'=' fetch arguments are ", "no longer supported.\n"; } my ($remote) = @_; if (@_ > 1) { die "Usage: $0 fetch [--all] [svn-remote]\n"; } $remote ||= $Git::SVN::default_repo_id; if ($_fetch_all) { cmd_multi_fetch(); } else { Git::SVN::fetch_all($remote, Git::SVN::read_all_remotes()); } } sub cmd_set_tree { my (@commits) = @_; if ($_stdin || !@commits) { print "Reading from stdin...\n"; @commits = (); while () { if (/\b($sha1_short)\b/o) { unshift @commits, $1; } } } my @revs; foreach my $c (@commits) { my @tmp = command('rev-parse',$c); if (scalar @tmp == 1) { push @revs, $tmp[0]; } elsif (scalar @tmp > 1) { push @revs, reverse(command('rev-list',@tmp)); } else { fatal "Failed to rev-parse $c"; } } my $gs = Git::SVN->new; my ($r_last, $cmt_last) = $gs->last_rev_commit; $gs->fetch; if (defined $gs->{last_rev} && $r_last != $gs->{last_rev}) { fatal "There are new revisions that were fetched ", "and need to be merged (or acknowledged) ", "before committing.\nlast rev: $r_last\n", " current: $gs->{last_rev}"; } $gs->set_tree($_) foreach @revs; print "Done committing ",scalar @revs," revisions to SVN\n"; } sub cmd_dcommit { my $head = shift; $head ||= 'HEAD'; my @refs; my ($url, $rev, $uuid, $gs) = working_head_info($head, \@refs); print "Committing to $url ...\n"; unless ($gs) { die "Unable to determine upstream SVN information from ", "$head history\n"; } my $last_rev; my ($linear_refs, $parents) = linearize_history($gs, \@refs); if ($_no_rebase && scalar(@$linear_refs) > 1) { warn "Attempting to commit more than one change while ", "--no-rebase is enabled.\n", "If these changes depend on each other, re-running ", "without --no-rebase will be required." } foreach my $d (@$linear_refs) { unless (defined $last_rev) { (undef, $last_rev, undef) = cmt_metadata("$d~1"); unless (defined $last_rev) { fatal "Unable to extract revision information ", "from commit $d~1"; } } if ($_dry_run) { print "diff-tree $d~1 $d\n"; } else { my $cmt_rev; my %ed_opts = ( r => $last_rev, log => get_commit_entry($d)->{log}, ra => Git::SVN::Ra->new($gs->full_url), tree_a => "$d~1", tree_b => $d, editor_cb => sub { print "Committed r$_[0]\n"; $cmt_rev = $_[0]; }, svn_path => ''); if (!SVN::Git::Editor->new(\%ed_opts)->apply_diff) { print "No changes\n$d~1 == $d\n"; } elsif ($parents->{$d} && @{$parents->{$d}}) { $gs->{inject_parents_dcommit}->{$cmt_rev} = $parents->{$d}; } $_fetch_all ? $gs->fetch_all : $gs->fetch; next if $_no_rebase; # we always want to rebase against the current HEAD, # not any head that was passed to us my @diff = command('diff-tree', 'HEAD', $gs->refname, '--'); my @finish; if (@diff) { @finish = rebase_cmd(); print STDERR "W: HEAD and ", $gs->refname, " differ, using @finish:\n", "@diff"; } else { print "No changes between current HEAD and ", $gs->refname, "\nResetting to the latest ", $gs->refname, "\n"; @finish = qw/reset --mixed/; } command_noisy(@finish, $gs->refname); $last_rev = $cmt_rev; } } } sub cmd_find_rev { my $revision_or_hash = shift; my $result; if ($revision_or_hash =~ /^r\d+$/) { my $head = shift; $head ||= 'HEAD'; my @refs; my (undef, undef, undef, $gs) = working_head_info($head, \@refs); unless ($gs) { die "Unable to determine upstream SVN information from ", "$head history\n"; } my $desired_revision = substr($revision_or_hash, 1); $result = $gs->rev_db_get($desired_revision); } else { my (undef, $rev, undef) = cmt_metadata($revision_or_hash); $result = $rev; } print "$result\n" if $result; } sub cmd_rebase { command_noisy(qw/update-index --refresh/); my ($url, $rev, $uuid, $gs) = working_head_info('HEAD'); unless ($gs) { die "Unable to determine upstream SVN information from ", "working tree history\n"; } if (command(qw/diff-index HEAD --/)) { print STDERR "Cannot rebase with uncommited changes:\n"; command_noisy('status'); exit 1; } unless ($_local) { $_fetch_all ? $gs->fetch_all : $gs->fetch; } command_noisy(rebase_cmd(), $gs->refname); } sub cmd_show_ignore { my ($url, $rev, $uuid, $gs) = working_head_info('HEAD'); $gs ||= Git::SVN->new; my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum); $gs->prop_walk($gs->{path}, $r, sub { my ($gs, $path, $props) = @_; print STDOUT "\n# $path\n"; my $s = $props->{'svn:ignore'} or return; $s =~ s/[\r\n]+/\n/g; chomp $s; $s =~ s#^#$path#gm; print STDOUT "$s\n"; }); } sub cmd_create_ignore { my ($url, $rev, $uuid, $gs) = working_head_info('HEAD'); $gs ||= Git::SVN->new; my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum); $gs->prop_walk($gs->{path}, $r, sub { my ($gs, $path, $props) = @_; # $path is of the form /path/to/dir/ my $ignore = '.' . $path . '.gitignore'; my $s = $props->{'svn:ignore'} or return; open(GITIGNORE, '>', $ignore) or fatal("Failed to open `$ignore' for writing: $!"); $s =~ s/[\r\n]+/\n/g; chomp $s; # Prefix all patterns so that the ignore doesn't apply # to sub-directories. $s =~ s#^#/#gm; print GITIGNORE "$s\n"; close(GITIGNORE) or fatal("Failed to close `$ignore': $!"); command_noisy('add', $ignore); }); } # get_svnprops(PATH) # ------------------ # Helper for cmd_propget and cmd_proplist below. sub get_svnprops { my $path = shift; my ($url, $rev, $uuid, $gs) = working_head_info('HEAD'); $gs ||= Git::SVN->new; # prefix THE PATH by the sub-directory from which the user # invoked us. $path = $cmd_dir_prefix . $path; fatal("No such file or directory: $path") unless -e $path; my $is_dir = -d $path ? 1 : 0; $path = $gs->{path} . '/' . $path; # canonicalize the path (otherwise libsvn will abort or fail to # find the file) # File::Spec->canonpath doesn't collapse x/../y into y (for a # good reason), so let's do this manually. $path =~ s#/+#/#g; $path =~ s#/\.(?:/|$)#/#g; $path =~ s#/[^/]+/\.\.##g; $path =~ s#/$##g; my $r = (defined $_revision ? $_revision : $gs->ra->get_latest_revnum); my $props; if ($is_dir) { (undef, undef, $props) = $gs->ra->get_dir($path, $r); } else { (undef, $props) = $gs->ra->get_file($path, $r, undef); } return $props; } # cmd_propget (PROP, PATH) # ------------------------ # Print the SVN property PROP for PATH. sub cmd_propget { my ($prop, $path) = @_; $path = '.' if not defined $path; usage(1) if not defined $prop; my $props = get_svnprops($path); if (not defined $props->{$prop}) { fatal("`$path' does not have a `$prop' SVN property."); } print $props->{$prop} . "\n"; } # cmd_proplist (PATH) # ------------------- # Print the list of SVN properties for PATH. sub cmd_proplist { my $path = shift; $path = '.' if not defined $path; my $props = get_svnprops($path); print "Properties on '$path':\n"; foreach (sort keys %{$props}) { print " $_\n"; } } sub cmd_multi_init { my $url = shift; unless (defined $_trunk || defined $_branches || defined $_tags) { usage(1); } # there are currently some bugs that prevent multi-init/multi-fetch # setups from working well without this. $Git::SVN::_minimize_url = 1; $_prefix = '' unless defined $_prefix; if (defined $url) { $url =~ s#/+$##; init_subdir(@_); } do_git_init_db(); if (defined $_trunk) { my $trunk_ref = $_prefix . 'trunk'; # try both old-style and new-style lookups: my $gs_trunk = eval { Git::SVN->new($trunk_ref) }; unless ($gs_trunk) { my ($trunk_url, $trunk_path) = complete_svn_url($url, $_trunk); $gs_trunk = Git::SVN->init($trunk_url, $trunk_path, undef, $trunk_ref); } } return unless defined $_branches || defined $_tags; my $ra = $url ? Git::SVN::Ra->new($url) : undef; complete_url_ls_init($ra, $_branches, '--branches/-b', $_prefix); complete_url_ls_init($ra, $_tags, '--tags/-t', $_prefix . 'tags/'); } sub cmd_multi_fetch { my $remotes = Git::SVN::read_all_remotes(); foreach my $repo_id (sort keys %$remotes) { if ($remotes->{$repo_id}->{url}) { Git::SVN::fetch_all($repo_id, $remotes); } } } # this command is special because it requires no metadata sub cmd_commit_diff { my ($ta, $tb, $url) = @_; my $usage = "Usage: $0 commit-diff -r ". " []"; fatal($usage) if (!defined $ta || !defined $tb); my $svn_path; if (!defined $url) { my $gs = eval { Git::SVN->new }; if (!$gs) { fatal("Needed URL or usable git-svn --id in ", "the command-line\n", $usage); } $url = $gs->{url}; $svn_path = $gs->{path}; } unless (defined $_revision) { fatal("-r|--revision is a required argument\n", $usage); } if (defined $_message && defined $_file) { fatal("Both --message/-m and --file/-F specified ", "for the commit message.\n", "I have no idea what you mean"); } if (defined $_file) { $_message = file_to_s($_file); } else { $_message ||= get_commit_entry($tb)->{log}; } my $ra ||= Git::SVN::Ra->new($url); $svn_path ||= $ra->{svn_path}; my $r = $_revision; if ($r eq 'HEAD') { $r = $ra->get_latest_revnum; } elsif ($r !~ /^\d+$/) { die "revision argument: $r not understood by git-svn\n"; } my %ed_opts = ( r => $r, log => $_message, ra => $ra, tree_a => $ta, tree_b => $tb, editor_cb => sub { print "Committed r$_[0]\n" }, svn_path => $svn_path ); if (!SVN::Git::Editor->new(\%ed_opts)->apply_diff) { print "No changes\n$ta == $tb\n"; } } ########################### utility functions ######################### sub rebase_cmd { my @cmd = qw/rebase/; push @cmd, '-v' if $_verbose; push @cmd, qw/--merge/ if $_merge; push @cmd, "--strategy=$_strategy" if $_strategy; @cmd; } sub post_fetch_checkout { return if $_no_checkout; my $gs = $Git::SVN::_head or return; return if verify_ref('refs/heads/master^0'); my $valid_head = verify_ref('HEAD^0'); command_noisy(qw(update-ref refs/heads/master), $gs->refname); return if ($valid_head || !verify_ref('HEAD^0')); return if $ENV{GIT_DIR} !~ m#^(?:.*/)?\.git$#; my $index = $ENV{GIT_INDEX_FILE} || "$ENV{GIT_DIR}/index"; return if -f $index; return if command_oneline(qw/rev-parse --is-inside-work-tree/) eq 'false'; return if command_oneline(qw/rev-parse --is-inside-git-dir/) eq 'true'; command_noisy(qw/read-tree -m -u -v HEAD HEAD/); print STDERR "Checked out HEAD:\n ", $gs->full_url, " r", $gs->last_rev, "\n"; } sub complete_svn_url { my ($url, $path) = @_; $path =~ s#/+$##; if ($path !~ m#^[a-z\+]+://#) { if (!defined $url || $url !~ m#^[a-z\+]+://#) { fatal("E: '$path' is not a complete URL ", "and a separate URL is not specified"); } return ($url, $path); } return ($path, ''); } sub complete_url_ls_init { my ($ra, $repo_path, $switch, $pfx) = @_; unless ($repo_path) { print STDERR "W: $switch not specified\n"; return; } $repo_path =~ s#/+$##; if ($repo_path =~ m#^[a-z\+]+://#) { $ra = Git::SVN::Ra->new($repo_path); $repo_path = ''; } else { $repo_path =~ s#^/+##; unless ($ra) { fatal("E: '$repo_path' is not a complete URL ", "and a separate URL is not specified"); } } my $url = $ra->{url}; my $gs = Git::SVN->init($url, undef, undef, undef, 1); my $k = "svn-remote.$gs->{repo_id}.url"; my $orig_url = eval { command_oneline(qw/config --get/, $k) }; if ($orig_url && ($orig_url ne $gs->{url})) { die "$k already set: $orig_url\n", "wanted to set to: $gs->{url}\n"; } command_oneline('config', $k, $gs->{url}) unless $orig_url; my $remote_path = "$ra->{svn_path}/$repo_path/*"; $remote_path =~ s#/+#/#g; $remote_path =~ s#^/##g; my ($n) = ($switch =~ /^--(\w+)/); if (length $pfx && $pfx !~ m#/$#) { die "--prefix='$pfx' must have a trailing slash '/'\n"; } command_noisy('config', "svn-remote.$gs->{repo_id}.$n", "$remote_path:refs/remotes/$pfx*"); } sub verify_ref { my ($ref) = @_; eval { command_oneline([ 'rev-parse', '--verify', $ref ], { STDERR => 0 }); }; } sub get_tree_from_treeish { my ($treeish) = @_; # $treeish can be a symbolic ref, too: my $type = command_oneline(qw/cat-file -t/, $treeish); my $expected; while ($type eq 'tag') { ($treeish, $type) = command(qw/cat-file tag/, $treeish); } if ($type eq 'commit') { $expected = (grep /^tree /, command(qw/cat-file commit/, $treeish))[0]; ($expected) = ($expected =~ /^tree ($sha1)$/o); die "Unable to get tree from $treeish\n" unless $expected; } elsif ($type eq 'tree') { $expected = $treeish; } else { die "$treeish is a $type, expected tree, tag or commit\n"; } return $expected; } sub get_commit_entry { my ($treeish) = shift; my %log_entry = ( log => '', tree => get_tree_from_treeish($treeish) ); my $commit_editmsg = "$ENV{GIT_DIR}/COMMIT_EDITMSG"; my $commit_msg = "$ENV{GIT_DIR}/COMMIT_MSG"; open my $log_fh, '>', $commit_editmsg or croak $!; my $type = command_oneline(qw/cat-file -t/, $treeish); if ($type eq 'commit' || $type eq 'tag') { my ($msg_fh, $ctx) = command_output_pipe('cat-file', $type, $treeish); my $in_msg = 0; while (<$msg_fh>) { if (!$in_msg) { $in_msg = 1 if (/^\s*$/); } elsif (/^git-svn-id: /) { # skip this for now, we regenerate the # correct one on re-fetch anyways # TODO: set *:merge properties or like... } else { print $log_fh $_ or croak $!; } } command_close_pipe($msg_fh, $ctx); } close $log_fh or croak $!; if ($_edit || ($type eq 'tree')) { my $editor = $ENV{VISUAL} || $ENV{EDITOR} || 'vi'; # TODO: strip out spaces, comments, like git-commit.sh system($editor, $commit_editmsg); } rename $commit_editmsg, $commit_msg or croak $!; open $log_fh, '<', $commit_msg or croak $!; { local $/; chomp($log_entry{log} = <$log_fh>); } close $log_fh or croak $!; unlink $commit_msg; \%log_entry; } sub s_to_file { my ($str, $file, $mode) = @_; open my $fd,'>',$file or croak $!; print $fd $str,"\n" or croak $!; close $fd or croak $!; chmod ($mode &~ umask, $file) if (defined $mode); } sub file_to_s { my $file = shift; open my $fd,'<',$file or croak "$!: file: $file\n"; local $/; my $ret = <$fd>; close $fd or croak $!; $ret =~ s/\s*$//s; return $ret; } # ' = real-name ' mapping based on git-svnimport: sub load_authors { open my $authors, '<', $_authors or die "Can't open $_authors $!\n"; my $log = $cmd eq 'log'; while (<$authors>) { chomp; next unless /^(.+?|\(no author\))\s*=\s*(.+?)\s*<(.+)>\s*$/; my ($user, $name, $email) = ($1, $2, $3); if ($log) { $Git::SVN::Log::rusers{"$name <$email>"} = $user; } else { $users{$user} = [$name, $email]; } } close $authors or croak $!; } # convert GetOpt::Long specs for use by git-config sub read_repo_config { return unless -d $ENV{GIT_DIR}; my $opts = shift; my @config_only; foreach my $o (keys %$opts) { # if we have mixedCase and a long option-only, then # it's a config-only variable that we don't need for # the command-line. push @config_only, $o if ($o =~ /[A-Z]/ && $o =~ /^[a-z]+$/i); my $v = $opts->{$o}; my ($key) = ($o =~ /^([a-zA-Z\-]+)/); $key =~ s/-//g; my $arg = 'git-config'; $arg .= ' --int' if ($o =~ /[:=]i$/); $arg .= ' --bool' if ($o !~ /[:=][sfi]$/); if (ref $v eq 'ARRAY') { chomp(my @tmp = `$arg --get-all svn.$key`); @$v = @tmp if @tmp; } else { chomp(my $tmp = `$arg --get svn.$key`); if ($tmp && !($arg =~ / --bool/ && $tmp eq 'false')) { $$v = $tmp; } } } delete @$opts{@config_only} if @config_only; } sub extract_metadata { my $id = shift or return (undef, undef, undef); my ($url, $rev, $uuid) = ($id =~ /^\s*git-svn-id:\s+(.*)\@(\d+) \s([a-f\d\-]+)$/x); if (!defined $rev || !$uuid || !$url) { # some of the original repositories I made had # identifiers like this: ($rev, $uuid) = ($id =~/^\s*git-svn-id:\s(\d+)\@([a-f\d\-]+)/); } return ($url, $rev, $uuid); } sub cmt_metadata { return extract_metadata((grep(/^git-svn-id: /, command(qw/cat-file commit/, shift)))[-1]); } sub working_head_info { my ($head, $refs) = @_; my @args = ('log', '--no-color', '--first-parent'); my ($fh, $ctx) = command_output_pipe(@args, $head); my $hash; my %max; while (<$fh>) { if ( m{^commit ($::sha1)$} ) { unshift @$refs, $hash if $hash and $refs; $hash = $1; next; } next unless s{^\s*(git-svn-id:)}{$1}; my ($url, $rev, $uuid) = extract_metadata($_); if (defined $url && defined $rev) { next if $max{$url} and $max{$url} < $rev; if (my $gs = Git::SVN->find_by_url($url)) { my $c = $gs->rev_db_get($rev); if ($c && $c eq $hash) { close $fh; # break the pipe return ($url, $rev, $uuid, $gs); } else { $max{$url} ||= $gs->rev_db_max; } } } } command_close_pipe($fh, $ctx); (undef, undef, undef, undef); } sub read_commit_parents { my ($parents, $c) = @_; chomp(my $p = command_oneline(qw/rev-list --parents -1/, $c)); $p =~ s/^($c)\s*// or die "rev-list --parents -1 $c failed!\n"; @{$parents->{$c}} = split(/ /, $p); } sub linearize_history { my ($gs, $refs) = @_; my %parents; foreach my $c (@$refs) { read_commit_parents(\%parents, $c); } my @linear_refs; my %skip = (); my $last_svn_commit = $gs->last_commit; foreach my $c (reverse @$refs) { next if $c eq $last_svn_commit; last if $skip{$c}; unshift @linear_refs, $c; $skip{$c} = 1; # we only want the first parent to diff against for linear # history, we save the rest to inject when we finalize the # svn commit my $fp_a = verify_ref("$c~1"); my $fp_b = shift @{$parents{$c}} if $parents{$c}; if (!$fp_a || !$fp_b) { die "Commit $c\n", "has no parent commit, and therefore ", "nothing to diff against.\n", "You should be working from a repository ", "originally created by git-svn\n"; } if ($fp_a ne $fp_b) { die "$c~1 = $fp_a, however parsing commit $c ", "revealed that:\n$c~1 = $fp_b\nBUG!\n"; } foreach my $p (@{$parents{$c}}) { $skip{$p} = 1; } } (\@linear_refs, \%parents); } package Git::SVN; use strict; use warnings; use vars qw/$default_repo_id $default_ref_id $_no_metadata $_follow_parent $_repack $_repack_flags $_use_svm_props $_head $_use_svnsync_props $no_reuse_existing $_minimize_url/; use Carp qw/croak/; use File::Path qw/mkpath/; use File::Copy qw/copy/; use IPC::Open3; my $_repack_nr; # properties that we do not log: my %SKIP_PROP; BEGIN { %SKIP_PROP = map { $_ => 1 } qw/svn:wc:ra_dav:version-url svn:special svn:executable svn:entry:committed-rev svn:entry:last-author svn:entry:uuid svn:entry:committed-date/; # some options are read globally, but can be overridden locally # per [svn-remote "..."] section. Command-line options will *NOT* # override options set in an [svn-remote "..."] section no strict 'refs'; for my $option (qw/follow_parent no_metadata use_svm_props use_svnsync_props/) { my $key = $option; $key =~ tr/_//d; my $prop = "-$option"; *$option = sub { my ($self) = @_; return $self->{$prop} if exists $self->{$prop}; my $k = "svn-remote.$self->{repo_id}.$key"; eval { command_oneline(qw/config --get/, $k) }; if ($@) { $self->{$prop} = ${"Git::SVN::_$option"}; } else { my $v = command_oneline(qw/config --bool/,$k); $self->{$prop} = $v eq 'false' ? 0 : 1; } return $self->{$prop}; } } } my %LOCKFILES; END { unlink keys %LOCKFILES if %LOCKFILES } sub resolve_local_globs { my ($url, $fetch, $glob_spec) = @_; return unless defined $glob_spec; my $ref = $glob_spec->{ref}; my $path = $glob_spec->{path}; foreach (command(qw#for-each-ref --format=%(refname) refs/remotes#)) { next unless m#^refs/remotes/$ref->{regex}$#; my $p = $1; my $pathname = desanitize_refname($path->full_path($p)); my $refname = desanitize_refname($ref->full_path($p)); if (my $existing = $fetch->{$pathname}) { if ($existing ne $refname) { die "Refspec conflict:\n", "existing: refs/remotes/$existing\n", " globbed: refs/remotes/$refname\n"; } my $u = (::cmt_metadata("refs/remotes/$refname"))[0]; $u =~ s!^\Q$url\E(/|$)!! or die "refs/remotes/$refname: '$url' not found in '$u'\n"; if ($pathname ne $u) { warn "W: Refspec glob conflict ", "(ref: refs/remotes/$refname):\n", "expected path: $pathname\n", " real path: $u\n", "Continuing ahead with $u\n"; next; } } else { $fetch->{$pathname} = $refname; } } } sub parse_revision_argument { my ($base, $head) = @_; if (!defined $::_revision || $::_revision eq 'BASE:HEAD') { return ($base, $head); } return ($1, $2) if ($::_revision =~ /^(\d+):(\d+)$/); return ($::_revision, $::_revision) if ($::_revision =~ /^\d+$/); return ($head, $head) if ($::_revision eq 'HEAD'); return ($base, $1) if ($::_revision =~ /^BASE:(\d+)$/); return ($1, $head) if ($::_revision =~ /^(\d+):HEAD$/); die "revision argument: $::_revision not understood by git-svn\n"; } sub fetch_all { my ($repo_id, $remotes) = @_; if (ref $repo_id) { my $gs = $repo_id; $repo_id = undef; $repo_id = $gs->{repo_id}; } $remotes ||= read_all_remotes(); my $remote = $remotes->{$repo_id} or die "[svn-remote \"$repo_id\"] unknown\n"; my $fetch = $remote->{fetch}; my $url = $remote->{url} or die "svn-remote.$repo_id.url not defined\n"; my (@gs, @globs); my $ra = Git::SVN::Ra->new($url); my $uuid = $ra->get_uuid; my $head = $ra->get_latest_revnum; my $base = defined $fetch ? $head : 0; # read the max revs for wildcard expansion (branches/*, tags/*) foreach my $t (qw/branches tags/) { defined $remote->{$t} or next; push @globs, $remote->{$t}; my $max_rev = eval { tmp_config(qw/--int --get/, "svn-remote.$repo_id.${t}-maxRev") }; if (defined $max_rev && ($max_rev < $base)) { $base = $max_rev; } elsif (!defined $max_rev) { $base = 0; } } if ($fetch) { foreach my $p (sort keys %$fetch) { my $gs = Git::SVN->new($fetch->{$p}, $repo_id, $p); my $lr = $gs->rev_db_max; if (defined $lr) { $base = $lr if ($lr < $base); } push @gs, $gs; } } ($base, $head) = parse_revision_argument($base, $head); $ra->gs_fetch_loop_common($base, $head, \@gs, \@globs); } sub read_all_remotes { my $r = {}; foreach (grep { s/^svn-remote\.// } command(qw/config -l/)) { if (m!^(.+)\.fetch=\s*(.*)\s*:\s*refs/remotes/(.+)\s*$!) { my ($remote, $local_ref, $remote_ref) = ($1, $2, $3); $local_ref =~ s{^/}{}; $r->{$remote}->{fetch}->{$local_ref} = $remote_ref; } elsif (m!^(.+)\.url=\s*(.*)\s*$!) { $r->{$1}->{url} = $2; } elsif (m!^(.+)\.(branches|tags)= (.*):refs/remotes/(.+)\s*$/!x) { my ($p, $g) = ($3, $4); my $rs = $r->{$1}->{$2} = { t => $2, remote => $1, path => Git::SVN::GlobSpec->new($p), ref => Git::SVN::GlobSpec->new($g) }; if (length($rs->{ref}->{right}) != 0) { die "The '*' glob character must be the last ", "character of '$g'\n"; } } } $r; } sub init_vars { if (defined $_repack) { $_repack = 1000 if ($_repack <= 0); $_repack_nr = $_repack; $_repack_flags ||= '-d'; } } sub verify_remotes_sanity { return unless -d $ENV{GIT_DIR}; my %seen; foreach (command(qw/config -l/)) { if (m!^svn-remote\.(?:.+)\.fetch=.*:refs/remotes/(\S+)\s*$!) { if ($seen{$1}) { die "Remote ref refs/remote/$1 is tracked by", "\n \"$_\"\nand\n \"$seen{$1}\"\n", "Please resolve this ambiguity in ", "your git configuration file before ", "continuing\n"; } $seen{$1} = $_; } } } # we allow more chars than remotes2config.sh... sub sanitize_remote_name { my ($name) = @_; $name =~ tr{A-Za-z0-9:,/+-}{.}c; $name; } sub find_existing_remote { my ($url, $remotes) = @_; return undef if $no_reuse_existing; my $existing; foreach my $repo_id (keys %$remotes) { my $u = $remotes->{$repo_id}->{url} or next; next if $u ne $url; $existing = $repo_id; last; } $existing; } sub init_remote_config { my ($self, $url, $no_write) = @_; $url =~ s!/+$!!; # strip trailing slash my $r = read_all_remotes(); my $existing = find_existing_remote($url, $r); if ($existing) { unless ($no_write) { print STDERR "Using existing ", "[svn-remote \"$existing\"]\n"; } $self->{repo_id} = $existing; } elsif ($_minimize_url) { my $min_url = Git::SVN::Ra->new($url)->minimize_url; $existing = find_existing_remote($min_url, $r); if ($existing) { unless ($no_write) { print STDERR "Using existing ", "[svn-remote \"$existing\"]\n"; } $self->{repo_id} = $existing; } if ($min_url ne $url) { unless ($no_write) { print STDERR "Using higher level of URL: ", "$url => $min_url\n"; } my $old_path = $self->{path}; $self->{path} = $url; $self->{path} =~ s!^\Q$min_url\E(/|$)!!; if (length $old_path) { $self->{path} .= "/$old_path"; } $url = $min_url; } } my $orig_url; if (!$existing) { # verify that we aren't overwriting anything: $orig_url = eval { command_oneline('config', '--get', "svn-remote.$self->{repo_id}.url") }; if ($orig_url && ($orig_url ne $url)) { die "svn-remote.$self->{repo_id}.url already set: ", "$orig_url\nwanted to set to: $url\n"; } } my ($xrepo_id, $xpath) = find_ref($self->refname); if (defined $xpath) { die "svn-remote.$xrepo_id.fetch already set to track ", "$xpath:refs/remotes/", $self->refname, "\n"; } unless ($no_write) { command_noisy('config', "svn-remote.$self->{repo_id}.url", $url); $self->{path} =~ s{^/}{}; command_noisy('config', '--add', "svn-remote.$self->{repo_id}.fetch", "$self->{path}:".$self->refname); } $self->{url} = $url; } sub find_by_url { # repos_root and, path are optional my ($class, $full_url, $repos_root, $path) = @_; return undef unless defined $full_url; remove_username($full_url); remove_username($repos_root) if defined $repos_root; my $remotes = read_all_remotes(); if (defined $full_url && defined $repos_root && !defined $path) { $path = $full_url; $path =~ s#^\Q$repos_root\E(?:/|$)##; } foreach my $repo_id (keys %$remotes) { my $u = $remotes->{$repo_id}->{url} or next; remove_username($u); next if defined $repos_root && $repos_root ne $u; my $fetch = $remotes->{$repo_id}->{fetch} || {}; foreach (qw/branches tags/) { resolve_local_globs($u, $fetch, $remotes->{$repo_id}->{$_}); } my $p = $path; unless (defined $p) { $p = $full_url; $p =~ s#^\Q$u\E(?:/|$)## or next; } foreach my $f (keys %$fetch) { next if $f ne $p; return Git::SVN->new($fetch->{$f}, $repo_id, $f); } } undef; } sub init { my ($class, $url, $path, $repo_id, $ref_id, $no_write) = @_; my $self = _new($class, $repo_id, $ref_id, $path); if (defined $url) { $self->init_remote_config($url, $no_write); } $self; } sub find_ref { my ($ref_id) = @_; foreach (command(qw/config -l/)) { next unless m!^svn-remote\.(.+)\.fetch= \s*(.*)\s*:\s*refs/remotes/(.+)\s*$!x; my ($repo_id, $path, $ref) = ($1, $2, $3); if ($ref eq $ref_id) { $path = '' if ($path =~ m#^\./?#); return ($repo_id, $path); } } (undef, undef, undef); } sub new { my ($class, $ref_id, $repo_id, $path) = @_; if (defined $ref_id && !defined $repo_id && !defined $path) { ($repo_id, $path) = find_ref($ref_id); if (!defined $repo_id) { die "Could not find a \"svn-remote.*.fetch\" key ", "in the repository configuration matching: ", "refs/remotes/$ref_id\n"; } } my $self = _new($class, $repo_id, $ref_id, $path); if (!defined $self->{path} || !length $self->{path}) { my $fetch = command_oneline('config', '--get', "svn-remote.$repo_id.fetch", ":refs/remotes/$ref_id\$") or die "Failed to read \"svn-remote.$repo_id.fetch\" ", "\":refs/remotes/$ref_id\$\" in config\n"; ($self->{path}, undef) = split(/\s*:\s*/, $fetch); } $self->{url} = command_oneline('config', '--get', "svn-remote.$repo_id.url") or die "Failed to read \"svn-remote.$repo_id.url\" in config\n"; $self->rebuild; $self; } sub refname { my ($refname) = "refs/remotes/$_[0]->{ref_id}" ; # It cannot end with a slash /, we'll throw up on this because # SVN can't have directories with a slash in their name, either: if ($refname =~ m{/$}) { die "ref: '$refname' ends with a trailing slash, this is ", "not permitted by git nor Subversion\n"; } # It cannot have ASCII control character space, tilde ~, caret ^, # colon :, question-mark ?, asterisk *, space, or open bracket [ # anywhere. # # Additionally, % must be escaped because it is used for escaping # and we want our escaped refname to be reversible $refname =~ s{([ \%~\^:\?\*\[\t])}{uc sprintf('%%%02x',ord($1))}eg; # no slash-separated component can begin with a dot . # /.* becomes /%2E* $refname =~ s{/\.}{/%2E}g; # It cannot have two consecutive dots .. anywhere # .. becomes %2E%2E $refname =~ s{\.\.}{%2E%2E}g; return $refname; } sub desanitize_refname { my ($refname) = @_; $refname =~ s{%(?:([0-9A-F]{2}))}{chr hex($1)}eg; return $refname; } sub svm_uuid { my ($self) = @_; return $self->{svm}->{uuid} if $self->svm; $self->ra; unless ($self->{svm}) { die "SVM UUID not cached, and reading remotely failed\n"; } $self->{svm}->{uuid}; } sub svm { my ($self) = @_; return $self->{svm} if $self->{svm}; my $svm; # see if we have it in our config, first: eval { my $section = "svn-remote.$self->{repo_id}"; $svm = { source => tmp_config('--get', "$section.svm-source"), uuid => tmp_config('--get', "$section.svm-uuid"), replace => tmp_config('--get', "$section.svm-replace"), } }; if ($svm && $svm->{source} && $svm->{uuid} && $svm->{replace}) { $self->{svm} = $svm; } $self->{svm}; } sub _set_svm_vars { my ($self, $ra) = @_; return $ra if $self->svm; my @err = ( "useSvmProps set, but failed to read SVM properties\n", "(svm:source, svm:uuid) ", "from the following URLs:\n" ); sub read_svm_props { my ($self, $ra, $path, $r) = @_; my $props = ($ra->get_dir($path, $r))[2]; my $src = $props->{'svm:source'}; my $uuid = $props->{'svm:uuid'}; return undef if (!$src || !$uuid); chomp($src, $uuid); $uuid =~ m{^[0-9a-f\-]{30,}$} or die "doesn't look right - svm:uuid is '$uuid'\n"; # the '!' is used to mark the repos_root!/relative/path $src =~ s{/?!/?}{/}; $src =~ s{/+$}{}; # no trailing slashes please # username is of no interest $src =~ s{(^[a-z\+]*://)[^/@]*@}{$1}; my $replace = $ra->{url}; $replace .= "/$path" if length $path; my $section = "svn-remote.$self->{repo_id}"; tmp_config("$section.svm-source", $src); tmp_config("$section.svm-replace", $replace); tmp_config("$section.svm-uuid", $uuid); $self->{svm} = { source => $src, uuid => $uuid, replace => $replace }; } my $r = $ra->get_latest_revnum; my $path = $self->{path}; my %tried; while (length $path) { unless ($tried{"$self->{url}/$path"}) { return $ra if $self->read_svm_props($ra, $path, $r); $tried{"$self->{url}/$path"} = 1; } $path =~ s#/?[^/]+$##; } die "Path: '$path' should be ''\n" if $path ne ''; return $ra if $self->read_svm_props($ra, $path, $r); $tried{"$self->{url}/$path"} = 1; if ($ra->{repos_root} eq $self->{url}) { die @err, (map { " $_\n" } keys %tried), "\n"; } # nope, make sure we're connected to the repository root: my $ok; my @tried_b; $path = $ra->{svn_path}; $ra = Git::SVN::Ra->new($ra->{repos_root}); while (length $path) { unless ($tried{"$ra->{url}/$path"}) { $ok = $self->read_svm_props($ra, $path, $r); last if $ok; $tried{"$ra->{url}/$path"} = 1; } $path =~ s#/?[^/]+$##; } die "Path: '$path' should be ''\n" if $path ne ''; $ok ||= $self->read_svm_props($ra, $path, $r); $tried{"$ra->{url}/$path"} = 1; if (!$ok) { die @err, (map { " $_\n" } keys %tried), "\n"; } Git::SVN::Ra->new($self->{url}); } sub svnsync { my ($self) = @_; return $self->{svnsync} if $self->{svnsync}; if ($self->no_metadata) { die "Can't have both 'noMetadata' and ", "'useSvnsyncProps' options set!\n"; } if ($self->rewrite_root) { die "Can't have both 'useSvnsyncProps' and 'rewriteRoot' ", "options set!\n"; } my $svnsync; # see if we have it in our config, first: eval { my $section = "svn-remote.$self->{repo_id}"; $svnsync = { url => tmp_config('--get', "$section.svnsync-url"), uuid => tmp_config('--get', "$section.svnsync-uuid"), } }; if ($svnsync && $svnsync->{url} && $svnsync->{uuid}) { return $self->{svnsync} = $svnsync; } my $err = "useSvnsyncProps set, but failed to read " . "svnsync property: svn:sync-from-"; my $rp = $self->ra->rev_proplist(0); my $url = $rp->{'svn:sync-from-url'} or die $err . "url\n"; $url =~ m{^[a-z\+]+://} or die "doesn't look right - svn:sync-from-url is '$url'\n"; my $uuid = $rp->{'svn:sync-from-uuid'} or die $err . "uuid\n"; $uuid =~ m{^[0-9a-f\-]{30,}$} or die "doesn't look right - svn:sync-from-uuid is '$uuid'\n"; my $section = "svn-remote.$self->{repo_id}"; tmp_config('--add', "$section.svnsync-uuid", $uuid); tmp_config('--add', "$section.svnsync-url", $url); return $self->{svnsync} = { url => $url, uuid => $uuid }; } # this allows us to memoize our SVN::Ra UUID locally and avoid a # remote lookup (useful for 'git svn log'). sub ra_uuid { my ($self) = @_; unless ($self->{ra_uuid}) { my $key = "svn-remote.$self->{repo_id}.uuid"; my $uuid = eval { tmp_config('--get', $key) }; if (!$@ && $uuid && $uuid =~ /^([a-f\d\-]{30,})$/) { $self->{ra_uuid} = $uuid; } else { die "ra_uuid called without URL\n" unless $self->{url}; $self->{ra_uuid} = $self->ra->get_uuid; tmp_config('--add', $key, $self->{ra_uuid}); } } $self->{ra_uuid}; } sub ra { my ($self) = shift; my $ra = Git::SVN::Ra->new($self->{url}); if ($self->use_svm_props && !$self->{svm}) { if ($self->no_metadata) { die "Can't have both 'noMetadata' and ", "'useSvmProps' options set!\n"; } elsif ($self->use_svnsync_props) { die "Can't have both 'useSvnsyncProps' and ", "'useSvmProps' options set!\n"; } $ra = $self->_set_svm_vars($ra); $self->{-want_revprops} = 1; } $ra; } sub rel_path { my ($self) = @_; my $repos_root = $self->ra->{repos_root}; return $self->{path} if ($self->{url} eq $repos_root); my $url = $self->{url} . (length $self->{path} ? "/$self->{path}" : $self->{path}); $url =~ s!^\Q$repos_root\E(?:/+|$)!!g; $url; } # prop_walk(PATH, REV, SUB) # ------------------------- # Recursively traverse PATH at revision REV and invoke SUB for each # directory that contains a SVN property. SUB will be invoked as # follows: &SUB(gs, path, props); where `gs' is this instance of # Git::SVN, `path' the path to the directory where the properties # `props' were found. The `path' will be relative to point of checkout, # that is, if url://repo/trunk is the current Git branch, and that # directory contains a sub-directory `d', SUB will be invoked with `/d/' # as `path' (note the trailing `/'). sub prop_walk { my ($self, $path, $rev, $sub) = @_; my ($dirent, undef, $props) = $self->ra->get_dir($path, $rev); $path =~ s#^/*#/#g; my $p = $path; # Strip the irrelevant part of the path. $p =~ s#^/+\Q$self->{path}\E(/|$)#/#; # Ensure the path is terminated by a `/'. $p =~ s#/*$#/#; # The properties contain all the internal SVN stuff nobody # (usually) cares about. my $interesting_props = 0; foreach (keys %{$props}) { # If it doesn't start with `svn:', it must be a # user-defined property. ++$interesting_props and next if $_ !~ /^svn:/; # FIXME: Fragile, if SVN adds new public properties, # this needs to be updated. ++$interesting_props if /^svn:(?:ignore|keywords|executable |eol-style|mime-type |externals|needs-lock)$/x; } &$sub($self, $p, $props) if $interesting_props; foreach (sort keys %$dirent) { next if $dirent->{$_}->{kind} != $SVN::Node::dir; $self->prop_walk($path . '/' . $_, $rev, $sub); } } sub last_rev { ($_[0]->last_rev_commit)[0] } sub last_commit { ($_[0]->last_rev_commit)[1] } # returns the newest SVN revision number and newest commit SHA1 sub last_rev_commit { my ($self) = @_; if (defined $self->{last_rev} && defined $self->{last_commit}) { return ($self->{last_rev}, $self->{last_commit}); } my $c = ::verify_ref($self->refname.'^0'); if ($c && !$self->use_svm_props && !$self->no_metadata) { my $rev = (::cmt_metadata($c))[1]; if (defined $rev) { ($self->{last_rev}, $self->{last_commit}) = ($rev, $c); return ($rev, $c); } } my $db_path = $self->db_path; unless (-e $db_path) { ($self->{last_rev}, $self->{last_commit}) = (undef, undef); return (undef, undef); } my $offset = -41; # from tail my $rl; open my $fh, '<', $db_path or croak "$db_path not readable: $!\n"; sysseek($fh, $offset, 2); # don't care for errors sysread($fh, $rl, 41) == 41 or return (undef, undef); chomp $rl; while (('0' x40) eq $rl && sysseek($fh, 0, 1) != 0) { $offset -= 41; sysseek($fh, $offset, 2); # don't care for errors sysread($fh, $rl, 41) == 41 or return (undef, undef); chomp $rl; } if ($c && $c ne $rl) { die "$db_path and ", $self->refname, " inconsistent!:\n$c != $rl\n"; } my $rev = sysseek($fh, 0, 1) or croak $!; $rev = ($rev - 41) / 41; close $fh or croak $!; ($self->{last_rev}, $self->{last_commit}) = ($rev, $c); return ($rev, $c); } sub get_fetch_range { my ($self, $min, $max) = @_; $max ||= $self->ra->get_latest_revnum; $min ||= $self->rev_db_max; (++$min, $max); } sub tmp_config { my (@args) = @_; my $old_def_config = "$ENV{GIT_DIR}/svn/config"; my $config = "$ENV{GIT_DIR}/svn/.metadata"; if (! -f $config && -f $old_def_config) { rename $old_def_config, $config or die "Failed rename $old_def_config => $config: $!\n"; } my $old_config = $ENV{GIT_CONFIG}; $ENV{GIT_CONFIG} = $config; $@ = undef; my @ret = eval { unless (-f $config) { mkfile($config); open my $fh, '>', $config or die "Can't open $config: $!\n"; print $fh "; This file is used internally by ", "git-svn\n" or die "Couldn't write to $config: $!\n"; print $fh "; You should not have to edit it\n" or die "Couldn't write to $config: $!\n"; close $fh or die "Couldn't close $config: $!\n"; } command('config', @args); }; my $err = $@; if (defined $old_config) { $ENV{GIT_CONFIG} = $old_config; } else { delete $ENV{GIT_CONFIG}; } die $err if $err; wantarray ? @ret : $ret[0]; } sub tmp_index_do { my ($self, $sub) = @_; my $old_index = $ENV{GIT_INDEX_FILE}; $ENV{GIT_INDEX_FILE} = $self->{index}; $@ = undef; my @ret = eval { my ($dir, $base) = ($self->{index} =~ m#^(.*?)/?([^/]+)$#); mkpath([$dir]) unless -d $dir; &$sub; }; my $err = $@; if (defined $old_index) { $ENV{GIT_INDEX_FILE} = $old_index; } else { delete $ENV{GIT_INDEX_FILE}; } die $err if $err; wantarray ? @ret : $ret[0]; } sub assert_index_clean { my ($self, $treeish) = @_; $self->tmp_index_do(sub { command_noisy('read-tree', $treeish) unless -e $self->{index}; my $x = command_oneline('write-tree'); my ($y) = (command(qw/cat-file commit/, $treeish) =~ /^tree ($::sha1)/mo); return if $y eq $x; warn "Index mismatch: $y != $x\nrereading $treeish\n"; unlink $self->{index} or die "unlink $self->{index}: $!\n"; command_noisy('read-tree', $treeish); $x = command_oneline('write-tree'); if ($y ne $x) { ::fatal "trees ($treeish) $y != $x\n", "Something is seriously wrong..."; } }); } sub get_commit_parents { my ($self, $log_entry) = @_; my (%seen, @ret, @tmp); # legacy support for 'set-tree'; this is only used by set_tree_cb: if (my $ip = $self->{inject_parents}) { if (my $commit = delete $ip->{$log_entry->{revision}}) { push @tmp, $commit; } } if (my $cur = ::verify_ref($self->refname.'^0')) { push @tmp, $cur; } if (my $ipd = $self->{inject_parents_dcommit}) { if (my $commit = delete $ipd->{$log_entry->{revision}}) { push @tmp, @$commit; } } push @tmp, $_ foreach (@{$log_entry->{parents}}, @tmp); while (my $p = shift @tmp) { next if $seen{$p}; $seen{$p} = 1; push @ret, $p; # MAXPARENT is defined to 16 in commit-tree.c: last if @ret >= 16; } if (@tmp) { die "r$log_entry->{revision}: No room for parents:\n\t", join("\n\t", @tmp), "\n"; } @ret; } sub rewrite_root { my ($self) = @_; return $self->{-rewrite_root} if exists $self->{-rewrite_root}; my $k = "svn-remote.$self->{repo_id}.rewriteRoot"; my $rwr = eval { command_oneline(qw/config --get/, $k) }; if ($rwr) { $rwr =~ s#/+$##; if ($rwr !~ m#^[a-z\+]+://#) { die "$rwr is not a valid URL (key: $k)\n"; } } $self->{-rewrite_root} = $rwr; } sub metadata_url { my ($self) = @_; ($self->rewrite_root || $self->{url}) . (length $self->{path} ? '/' . $self->{path} : ''); } sub full_url { my ($self) = @_; $self->{url} . (length $self->{path} ? '/' . $self->{path} : ''); } sub do_git_commit { my ($self, $log_entry) = @_; my $lr = $self->last_rev; if (defined $lr && $lr >= $log_entry->{revision}) { die "Last fetched revision of ", $self->refname, " was r$lr, but we are about to fetch: ", "r$log_entry->{revision}!\n"; } if (my $c = $self->rev_db_get($log_entry->{revision})) { croak "$log_entry->{revision} = $c already exists! ", "Why are we refetching it?\n"; } $ENV{GIT_AUTHOR_NAME} = $ENV{GIT_COMMITTER_NAME} = $log_entry->{name}; $ENV{GIT_AUTHOR_EMAIL} = $ENV{GIT_COMMITTER_EMAIL} = $log_entry->{email}; $ENV{GIT_AUTHOR_DATE} = $ENV{GIT_COMMITTER_DATE} = $log_entry->{date}; my $tree = $log_entry->{tree}; if (!defined $tree) { $tree = $self->tmp_index_do(sub { command_oneline('write-tree') }); } die "Tree is not a valid sha1: $tree\n" if $tree !~ /^$::sha1$/o; my @exec = ('git-commit-tree', $tree); foreach ($self->get_commit_parents($log_entry)) { push @exec, '-p', $_; } defined(my $pid = open3(my $msg_fh, my $out_fh, '>&STDERR', @exec)) or croak $!; print $msg_fh $log_entry->{log} or croak $!; unless ($self->no_metadata) { print $msg_fh "\ngit-svn-id: $log_entry->{metadata}\n" or croak $!; } $msg_fh->flush == 0 or croak $!; close $msg_fh or croak $!; chomp(my $commit = do { local $/; <$out_fh> }); close $out_fh or croak $!; waitpid $pid, 0; croak $? if $?; if ($commit !~ /^$::sha1$/o) { die "Failed to commit, invalid sha1: $commit\n"; } $self->rev_db_set($log_entry->{revision}, $commit, 1); $self->{last_rev} = $log_entry->{revision}; $self->{last_commit} = $commit; print "r$log_entry->{revision}"; if (defined $log_entry->{svm_revision}) { print " (\@$log_entry->{svm_revision})"; $self->rev_db_set($log_entry->{svm_revision}, $commit, 0, $self->svm_uuid); } print " = $commit ($self->{ref_id})\n"; if (defined $_repack && (--$_repack_nr == 0)) { $_repack_nr = $_repack; # repack doesn't use any arguments with spaces in them, does it? print "Running git repack $_repack_flags ...\n"; command_noisy('repack', split(/\s+/, $_repack_flags)); print "Done repacking\n"; } return $commit; } sub match_paths { my ($self, $paths, $r) = @_; return 1 if $self->{path} eq ''; if (my $path = $paths->{"/$self->{path}"}) { return ($path->{action} eq 'D') ? 0 : 1; } $self->{path_regex} ||= qr/^\/\Q$self->{path}\E\//; if (grep /$self->{path_regex}/, keys %$paths) { return 1; } my $c = ''; foreach (split m#/#, $self->{path}) { $c .= "/$_"; next unless ($paths->{$c} && ($paths->{$c}->{action} =~ /^[AR]$/)); if ($self->ra->check_path($self->{path}, $r) == $SVN::Node::dir) { return 1; } } return 0; } sub find_parent_branch { my ($self, $paths, $rev) = @_; return undef unless $self->follow_parent; unless (defined $paths) { my $err_handler = $SVN::Error::handler; $SVN::Error::handler = \&Git::SVN::Ra::skip_unknown_revs; $self->ra->get_log([$self->{path}], $rev, $rev, 0, 1, 1, sub { $paths = Git::SVN::Ra::dup_changed_paths($_[0]) }); $SVN::Error::handler = $err_handler; } return undef unless defined $paths; # look for a parent from another branch: my @b_path_components = split m#/#, $self->rel_path; my @a_path_components; my $i; while (@b_path_components) { $i = $paths->{'/'.join('/', @b_path_components)}; last if $i && defined $i->{copyfrom_path}; unshift(@a_path_components, pop(@b_path_components)); } return undef unless defined $i && defined $i->{copyfrom_path}; my $branch_from = $i->{copyfrom_path}; if (@a_path_components) { print STDERR "branch_from: $branch_from => "; $branch_from .= '/'.join('/', @a_path_components); print STDERR $branch_from, "\n"; } my $r = $i->{copyfrom_rev}; my $repos_root = $self->ra->{repos_root}; my $url = $self->ra->{url}; my $new_url = $repos_root . $branch_from; print STDERR "Found possible branch point: ", "$new_url => ", $self->full_url, ", $r\n"; $branch_from =~ s#^/##; my $gs = Git::SVN->find_by_url($new_url, $repos_root, $branch_from); unless ($gs) { my $ref_id = $self->{ref_id}; $ref_id =~ s/\@\d+$//; $ref_id .= "\@$r"; # just grow a tail if we're not unique enough :x $ref_id .= '-' while find_ref($ref_id); print STDERR "Initializing parent: $ref_id\n"; $gs = Git::SVN->init($new_url, '', $ref_id, $ref_id, 1); } my ($r0, $parent) = $gs->find_rev_before($r, 1); if (!defined $r0 || !defined $parent) { my ($base, $head) = parse_revision_argument(0, $r); if ($base <= $r) { $gs->fetch($base, $r); } ($r0, $parent) = $gs->last_rev_commit; } if (defined $r0 && defined $parent) { print STDERR "Found branch parent: ($self->{ref_id}) $parent\n"; my $ed; if ($self->ra->can_do_switch) { $self->assert_index_clean($parent); print STDERR "Following parent with do_switch\n"; # do_switch works with svn/trunk >= r22312, but that # is not included with SVN 1.4.3 (the latest version # at the moment), so we can't rely on it $self->{last_commit} = $parent; $ed = SVN::Git::Fetcher->new($self); $gs->ra->gs_do_switch($r0, $rev, $gs, $self->full_url, $ed) or die "SVN connection failed somewhere...\n"; } elsif ($self->ra->trees_match($new_url, $r0, $self->full_url, $rev)) { print STDERR "Trees match:\n", " $new_url\@$r0\n", " ${\$self->full_url}\@$rev\n", "Following parent with no changes\n"; $self->tmp_index_do(sub { command_noisy('read-tree', $parent); }); $self->{last_commit} = $parent; } else { print STDERR "Following parent with do_update\n"; $ed = SVN::Git::Fetcher->new($self); $self->ra->gs_do_update($rev, $rev, $self, $ed) or die "SVN connection failed somewhere...\n"; } print STDERR "Successfully followed parent\n"; return $self->make_log_entry($rev, [$parent], $ed); } return undef; } sub do_fetch { my ($self, $paths, $rev) = @_; my $ed; my ($last_rev, @parents); if (my $lc = $self->last_commit) { # we can have a branch that was deleted, then re-added # under the same name but copied from another path, in # which case we'll have multiple parents (we don't # want to break the original ref, nor lose copypath info): if (my $log_entry = $self->find_parent_branch($paths, $rev)) { push @{$log_entry->{parents}}, $lc; return $log_entry; } $ed = SVN::Git::Fetcher->new($self); $last_rev = $self->{last_rev}; $ed->{c} = $lc; @parents = ($lc); } else { $last_rev = $rev; if (my $log_entry = $self->find_parent_branch($paths, $rev)) { return $log_entry; } $ed = SVN::Git::Fetcher->new($self); } unless ($self->ra->gs_do_update($last_rev, $rev, $self, $ed)) { die "SVN connection failed somewhere...\n"; } $self->make_log_entry($rev, \@parents, $ed); } sub get_untracked { my ($self, $ed) = @_; my @out; my $h = $ed->{empty}; foreach (sort keys %$h) { my $act = $h->{$_} ? '+empty_dir' : '-empty_dir'; push @out, " $act: " . uri_encode($_); warn "W: $act: $_\n"; } foreach my $t (qw/dir_prop file_prop/) { $h = $ed->{$t} or next; foreach my $path (sort keys %$h) { my $ppath = $path eq '' ? '.' : $path; foreach my $prop (sort keys %{$h->{$path}}) { next if $SKIP_PROP{$prop}; my $v = $h->{$path}->{$prop}; my $t_ppath_prop = "$t: " . uri_encode($ppath) . ' ' . uri_encode($prop); if (defined $v) { push @out, " +$t_ppath_prop " . uri_encode($v); } else { push @out, " -$t_ppath_prop"; } } } } foreach my $t (qw/absent_file absent_directory/) { $h = $ed->{$t} or next; foreach my $parent (sort keys %$h) { foreach my $path (sort @{$h->{$parent}}) { push @out, " $t: " . uri_encode("$parent/$path"); warn "W: $t: $parent/$path ", "Insufficient permissions?\n"; } } } \@out; } sub parse_svn_date { my $date = shift || return '+0000 1970-01-01 00:00:00'; my ($Y,$m,$d,$H,$M,$S) = ($date =~ /^(\d{4})\-(\d\d)\-(\d\d)T (\d\d)\:(\d\d)\:(\d\d).\d+Z$/x) or croak "Unable to parse date: $date\n"; "+0000 $Y-$m-$d $H:$M:$S"; } sub check_author { my ($author) = @_; if (!defined $author || length $author == 0) { $author = '(no author)'; } if (defined $::_authors && ! defined $::users{$author}) { die "Author: $author not defined in $::_authors file\n"; } $author; } sub make_log_entry { my ($self, $rev, $parents, $ed) = @_; my $untracked = $self->get_untracked($ed); open my $un, '>>', "$self->{dir}/unhandled.log" or croak $!; print $un "r$rev\n" or croak $!; print $un $_, "\n" foreach @$untracked; my %log_entry = ( parents => $parents || [], revision => $rev, log => ''); my $headrev; my $logged = delete $self->{logged_rev_props}; if (!$logged || $self->{-want_revprops}) { my $rp = $self->ra->rev_proplist($rev); foreach (sort keys %$rp) { my $v = $rp->{$_}; if (/^svn:(author|date|log)$/) { $log_entry{$1} = $v; } elsif ($_ eq 'svm:headrev') { $headrev = $v; } else { print $un " rev_prop: ", uri_encode($_), ' ', uri_encode($v), "\n"; } } } else { map { $log_entry{$_} = $logged->{$_} } keys %$logged; } close $un or croak $!; $log_entry{date} = parse_svn_date($log_entry{date}); $log_entry{log} .= "\n"; my $author = $log_entry{author} = check_author($log_entry{author}); my ($name, $email) = defined $::users{$author} ? @{$::users{$author}} : ($author, undef); if (defined $headrev && $self->use_svm_props) { if ($self->rewrite_root) { die "Can't have both 'useSvmProps' and 'rewriteRoot' ", "options set!\n"; } my ($uuid, $r) = $headrev =~ m{^([a-f\d\-]{30,}):(\d+)$}; # we don't want "SVM: initializing mirror for junk" ... return undef if $r == 0; my $svm = $self->svm; if ($uuid ne $svm->{uuid}) { die "UUID mismatch on SVM path:\n", "expected: $svm->{uuid}\n", " got: $uuid\n"; } my $full_url = $self->full_url; $full_url =~ s#^\Q$svm->{replace}\E(/|$)#$svm->{source}$1# or die "Failed to replace '$svm->{replace}' with ", "'$svm->{source}' in $full_url\n"; # throw away username for storing in records remove_username($full_url); $log_entry{metadata} = "$full_url\@$r $uuid"; $log_entry{svm_revision} = $r; $email ||= "$author\@$uuid" } elsif ($self->use_svnsync_props) { my $full_url = $self->svnsync->{url}; $full_url .= "/$self->{path}" if length $self->{path}; remove_username($full_url); my $uuid = $self->svnsync->{uuid}; $log_entry{metadata} = "$full_url\@$rev $uuid"; $email ||= "$author\@$uuid" } else { my $url = $self->metadata_url; remove_username($url); $log_entry{metadata} = "$url\@$rev " . $self->ra->get_uuid; $email ||= "$author\@" . $self->ra->get_uuid; } $log_entry{name} = $name; $log_entry{email} = $email; \%log_entry; } sub fetch { my ($self, $min_rev, $max_rev, @parents) = @_; my ($last_rev, $last_commit) = $self->last_rev_commit; my ($base, $head) = $self->get_fetch_range($min_rev, $max_rev); $self->ra->gs_fetch_loop_common($base, $head, [$self]); } sub set_tree_cb { my ($self, $log_entry, $tree, $rev, $date, $author) = @_; $self->{inject_parents} = { $rev => $tree }; $self->fetch(undef, undef); } sub set_tree { my ($self, $tree) = (shift, shift); my $log_entry = ::get_commit_entry($tree); unless ($self->{last_rev