summaryrefslogtreecommitdiff
path: root/contrib/fast-import
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/fast-import')
-rwxr-xr-xcontrib/fast-import/git-p4181
-rwxr-xr-xcontrib/fast-import/import-directories.perl416
-rwxr-xr-xcontrib/fast-import/import-tars.perl50
3 files changed, 569 insertions, 78 deletions
diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 342529db30..e710219ca5 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -8,12 +8,10 @@
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
#
-import optparse, sys, os, marshal, popen2, subprocess, shelve
-import tempfile, getopt, sha, os.path, time, platform
+import optparse, sys, os, marshal, subprocess, shelve
+import tempfile, getopt, os.path, time, platform
import re
-from sets import Set;
-
verbose = False
@@ -201,7 +199,7 @@ def isModeExec(mode):
def isModeExecChanged(src_mode, dst_mode):
return isModeExec(src_mode) != isModeExec(dst_mode)
-def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None):
cmd = p4_build_cmd("-G %s" % (cmd))
if verbose:
sys.stderr.write("Opening pipe: %s\n" % cmd)
@@ -224,7 +222,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
try:
while True:
entry = marshal.load(p4.stdout)
- result.append(entry)
+ if cb is not None:
+ cb(entry)
+ else:
+ result.append(entry)
except EOFError:
pass
exitCode = p4.wait()
@@ -861,8 +862,8 @@ class P4Sync(Command):
self.usage += " //depot/path[@revRange]"
self.silent = False
- self.createdBranches = Set()
- self.committedChanges = Set()
+ self.createdBranches = set()
+ self.committedChanges = set()
self.branch = ""
self.detectBranches = False
self.detectLabels = False
@@ -950,10 +951,84 @@ class P4Sync(Command):
return branches
- ## Should move this out, doesn't use SELF.
- def readP4Files(self, files):
+ # output one file from the P4 stream
+ # - helper for streamP4Files
+
+ def streamOneP4File(self, file, contents):
+ if file["type"] == "apple":
+ print "\nfile %s is a strange apple file that forks. Ignoring" % \
+ file['depotFile']
+ return
+
+ relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
+ if verbose:
+ sys.stderr.write("%s\n" % relPath)
+
+ mode = "644"
+ if isP4Exec(file["type"]):
+ mode = "755"
+ elif file["type"] == "symlink":
+ mode = "120000"
+ # p4 print on a symlink contains "target\n", so strip it off
+ last = contents.pop()
+ last = last[:-1]
+ contents.append(last)
+
+ if self.isWindows and file["type"].endswith("text"):
+ mangled = []
+ for data in contents:
+ data = data.replace("\r\n", "\n")
+ mangled.append(data)
+ contents = mangled
+
+ if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
+ contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents)
+ elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
+ contents = map(lambda text: re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text), contents)
+
+ self.gitStream.write("M %s inline %s\n" % (mode, relPath))
+
+ # total length...
+ length = 0
+ for d in contents:
+ length = length + len(d)
+
+ self.gitStream.write("data %d\n" % length)
+ for d in contents:
+ self.gitStream.write(d)
+ self.gitStream.write("\n")
+
+ def streamOneP4Deletion(self, file):
+ relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
+ if verbose:
+ sys.stderr.write("delete %s\n" % relPath)
+ self.gitStream.write("D %s\n" % relPath)
+
+ # handle another chunk of streaming data
+ def streamP4FilesCb(self, marshalled):
+
+ if marshalled.has_key('depotFile') and self.stream_have_file_info:
+ # start of a new file - output the old one first
+ self.streamOneP4File(self.stream_file, self.stream_contents)
+ self.stream_file = {}
+ self.stream_contents = []
+ self.stream_have_file_info = False
+
+ # pick up the new file information... for the
+ # 'data' field we need to append to our array
+ for k in marshalled.keys():
+ if k == 'data':
+ self.stream_contents.append(marshalled['data'])
+ else:
+ self.stream_file[k] = marshalled[k]
+
+ self.stream_have_file_info = True
+
+ # Stream directly from "p4 files" into "git fast-import"
+ def streamP4Files(self, files):
filesForCommit = []
filesToRead = []
+ filesToDelete = []
for f in files:
includeFile = True
@@ -967,50 +1042,35 @@ class P4Sync(Command):
filesForCommit.append(f)
if f['action'] not in ('delete', 'purge'):
filesToRead.append(f)
+ else:
+ filesToDelete.append(f)
- filedata = []
- if len(filesToRead) > 0:
- filedata = p4CmdList('-x - print',
- stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
- for f in filesToRead]),
- stdin_mode='w+')
-
- if "p4ExitCode" in filedata[0]:
- die("Problems executing p4. Error: [%d]."
- % (filedata[0]['p4ExitCode']));
-
- j = 0;
- contents = {}
- while j < len(filedata):
- stat = filedata[j]
- j += 1
- text = ''
- while j < len(filedata) and filedata[j]['code'] in ('text', 'unicode', 'binary'):
- text += filedata[j]['data']
- del filedata[j]['data']
- j += 1
-
- if not stat.has_key('depotFile'):
- sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
- continue
+ # deleted files...
+ for f in filesToDelete:
+ self.streamOneP4Deletion(f)
- if stat['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
- text = re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text)
- elif stat['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
- text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text)
+ if len(filesToRead) > 0:
+ self.stream_file = {}
+ self.stream_contents = []
+ self.stream_have_file_info = False
- contents[stat['depotFile']] = text
+ # curry self argument
+ def streamP4FilesCbSelf(entry):
+ self.streamP4FilesCb(entry)
- for f in filesForCommit:
- path = f['path']
- if contents.has_key(path):
- f['data'] = contents[path]
+ p4CmdList("-x - print",
+ '\n'.join(['%s#%s' % (f['path'], f['rev'])
+ for f in filesToRead]),
+ cb=streamP4FilesCbSelf)
- return filesForCommit
+ # do the last chunk
+ if self.stream_file.has_key('depotFile'):
+ self.streamOneP4File(self.stream_file, self.stream_contents)
def commit(self, details, files, branch, branchPrefixes, parent = ""):
epoch = details["time"]
author = details["user"]
+ self.branchPrefixes = branchPrefixes
if self.verbose:
print "commit into %s" % branch
@@ -1023,7 +1083,6 @@ class P4Sync(Command):
new_files.append (f)
else:
sys.stderr.write("Ignoring file outside of prefix: %s\n" % path)
- files = self.readP4Files(new_files)
self.gitStream.write("commit %s\n" % branch)
# gitStream.write("mark :%s\n" % details["change"])
@@ -1051,33 +1110,7 @@ class P4Sync(Command):
print "parent %s" % parent
self.gitStream.write("from %s\n" % parent)
- for file in files:
- if file["type"] == "apple":
- print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path']
- continue
-
- relPath = self.stripRepoPath(file['path'], branchPrefixes)
- if file["action"] in ("delete", "purge"):
- self.gitStream.write("D %s\n" % relPath)
- else:
- data = file['data']
-
- mode = "644"
- if isP4Exec(file["type"]):
- mode = "755"
- elif file["type"] == "symlink":
- mode = "120000"
- # p4 print on a symlink contains "target\n", so strip it off
- data = data[:-1]
-
- if self.isWindows and file["type"].endswith("text"):
- data = data.replace("\r\n", "\n")
-
- self.gitStream.write("M %s inline %s\n" % (mode, relPath))
- self.gitStream.write("data %s\n" % len(data))
- self.gitStream.write(data)
- self.gitStream.write("\n")
-
+ self.streamP4Files(new_files)
self.gitStream.write("\n")
change = int(details["change"])
@@ -1627,7 +1660,7 @@ class P4Sync(Command):
if len(self.changesFile) > 0:
output = open(self.changesFile).readlines()
- changeSet = Set()
+ changeSet = set()
for line in output:
changeSet.add(int(line))
diff --git a/contrib/fast-import/import-directories.perl b/contrib/fast-import/import-directories.perl
new file mode 100755
index 0000000000..5782d80e26
--- /dev/null
+++ b/contrib/fast-import/import-directories.perl
@@ -0,0 +1,416 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
+#
+# ------------------------------------------------------------------------
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+# ------------------------------------------------------------------------
+
+=pod
+
+=head1 NAME
+
+import-directories - Import bits and pieces to Git.
+
+=head1 SYNOPSIS
+
+B<import-directories.perl> F<configfile> F<outputfile>
+
+=head1 DESCRIPTION
+
+Script to import arbitrary projects version controlled by the "copy the
+source directory to a new location and edit it there"-version controlled
+projects into version control. Handles projects with arbitrary branching
+and version trees, taking a file describing the inputs and generating a
+file compatible with the L<git-fast-import(1)> format.
+
+=head1 CONFIGURATION FILE
+
+=head2 Format
+
+The configuration file is based on the standard I<.ini> format.
+
+ ; Comments start with semi-colons
+ [section]
+ key=value
+
+Please see below for information on how to escape special characters.
+
+=head2 Global configuration
+
+Global configuration is done in the B<[config]> section, which should be
+the first section in the file. Configuration can be changed by
+repeating configuration sections later on.
+
+ [config]
+ ; configure conversion of CRLFs. "convert" means that all CRLFs
+ ; should be converted into LFs (suitable for the core.autocrlf
+ ; setting set to true in Git). "none" means that all data is
+ ; treated as binary.
+ crlf=convert
+
+=head2 Revision configuration
+
+Each revision that is to be imported is described in three
+sections. Revisions should be defined in topological order, so
+that a revision's parent has always been defined when a new revision
+is introduced. All the sections for one revision must be defined
+before defining the next revision.
+
+Each revision is assigned a unique numerical identifier. The
+numbers do not need to be consecutive, nor monotonically
+increasing.
+
+For instance, if your configuration file contains only the two
+revisions 4711 and 42, where 4711 is the initial commit, the
+only requirement is that 4711 is completely defined before 42.
+
+=pod
+
+=head3 Revision description section
+
+A section whose section name is just an integer gives meta-data
+about the revision.
+
+ [3]
+ ; author sets the author of the revisions
+ author=Peter Krefting <peter@softwolves.pp.se>
+ ; branch sets the branch that the revision should be committed to
+ branch=master
+ ; parent describes the revision that is the parent of this commit
+ ; (optional)
+ parent=1
+ ; merges describes a revision that is merged into this commit
+ ; (optional; can be repeated)
+ merges=2
+ ; selects one file to take the timestamp from
+ ; (optional; if unspecified, the most recent file from the .files
+ ; section is used)
+ timestamp=3/source.c
+
+=head3 Revision contents section
+
+A section whose section name is an integer followed by B<.files>
+describe all the files included in this revision. If a file that
+was available previously is not included in this revision, it will
+be removed.
+
+If an on-disk revision is incomplete, you can point to files from
+a previous revision. There are no restriction as to where the source
+files are located, nor to the names of them.
+
+ [3.files]
+ ; the key is the path inside the repository, the value is the path
+ ; as seen from the importer script.
+ source.c=ver-3.00/source.c
+ source.h=ver-2.99/source.h
+ readme.txt=ver-3.00/introduction to the project.txt
+
+File names are treated as byte strings (but please see below on
+quoting rules), and should be stored in the configuration file in
+the encoding that should be used in the generated repository.
+
+=head3 Revision commit message section
+
+A section whose section name is an integer followed by B<.message>
+gives the commit message. This section is read verbatim, up until
+the beginning of the next section. As such, a commit message may not
+contain a line that begins with an opening square bracket ("[") and
+ends with a closing square bracket ("]"), unless they are surrounded
+by whitespace or other characters.
+
+ [3.message]
+ Implement foobar.
+ ; trailing blank lines are ignored.
+
+=cut
+
+# Globals
+use strict;
+use integer;
+my $crlfmode = 0;
+my @revs;
+my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
+my $sectiontype = 0;
+my $rev = 0;
+my $mark = 1;
+
+# Check command line
+if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
+{
+ exec('perldoc', $0);
+ exit 1;
+}
+
+# Open configuration
+my $config = $ARGV[0];
+open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
+
+# Open output
+my $output = $ARGV[1];
+open OUT, '>', $output or die "Cannot create output file \"$output\": ";
+binmode OUT;
+
+LINE: while (my $line = <CFG>)
+{
+ $line =~ s/\r?\n$//;
+ next LINE if $sectiontype != 4 && $line eq '';
+ next LINE if $line =~ /^;/;
+ my $oldsectiontype = $sectiontype;
+ my $oldrev = $rev;
+
+ # Sections
+ if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
+ {
+ if ($1 eq 'config')
+ {
+ $sectiontype = 1;
+ }
+ elsif ($3 eq '')
+ {
+ $sectiontype = 2;
+ $rev = $2;
+ # Create a new revision
+ die "Duplicate rev: $line\n " if defined $revmap{$rev};
+ print "Reading revision $rev\n";
+ push @revs, $rev;
+ $revmap{$rev} = $mark ++;
+ $time{$revmap{$rev}} = 0;
+ }
+ elsif ($3 eq '.files')
+ {
+ $sectiontype = 3;
+ $rev = $2;
+ die "Revision mismatch: $line\n " unless $rev == $oldrev;
+ }
+ elsif ($3 eq '.message')
+ {
+ $sectiontype = 4;
+ $rev = $2;
+ die "Revision mismatch: $line\n " unless $rev == $oldrev;
+ }
+ else
+ {
+ die "Internal parse error: $line\n ";
+ }
+ next LINE;
+ }
+
+ # Parse data
+ if ($sectiontype != 4)
+ {
+ # Key and value
+ if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
+ {
+ my ($key, $value) = &parsekeyvaluepair($1);
+ # Global configuration
+ if (1 == $sectiontype)
+ {
+ if ($key eq 'crlf')
+ {
+ $crlfmode = 1, next LINE if $value eq 'convert';
+ $crlfmode = 0, next LINE if $value eq 'none';
+ }
+ die "Unknown configuration option: $line\n ";
+ }
+ # Revision specification
+ if (2 == $sectiontype)
+ {
+ my $current = $revmap{$rev};
+ $author{$current} = $value, next LINE if $key eq 'author';
+ $branch{$current} = $value, next LINE if $key eq 'branch';
+ $parent{$current} = $value, next LINE if $key eq 'parent';
+ $timesource{$current} = $value, next LINE if $key eq 'timestamp';
+ push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
+ die "Unknown revision option: $line\n ";
+ }
+ # Filespecs
+ if (3 == $sectiontype)
+ {
+ # Add the file and create a marker
+ die "File not found: $line\n " unless -f $value;
+ my $current = $revmap{$rev};
+ ${$files{$current}}{$key} = $mark;
+ my $time = &fileblob($value, $crlfmode, $mark ++);
+
+ # Update revision timestamp if more recent than other
+ # files seen, or if this is the file we have selected
+ # to take the time stamp from using the "timestamp"
+ # directive.
+ if ((defined $timesource{$current} && $timesource{$current} eq $value)
+ || $time > $time{$current})
+ {
+ $time{$current} = $time;
+ }
+ }
+ }
+ else
+ {
+ die "Parse error: $line\n ";
+ }
+ }
+ else
+ {
+ # Commit message
+ my $current = $revmap{$rev};
+ if (defined $message{$current})
+ {
+ $message{$current} .= "\n";
+ }
+ $message{$current} .= $line;
+ }
+}
+close CFG;
+
+# Start spewing out data for git-fast-import
+foreach my $commit (@revs)
+{
+ # Progress
+ print OUT "progress Creating revision $commit\n";
+
+ # Create commit header
+ my $mark = $revmap{$commit};
+
+ # Branch and commit id
+ print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
+
+ # Author and timestamp
+ die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
+ print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
+
+ # Commit message
+ die "No message defined for $commit\n" unless defined $message{$mark};
+ my $message = $message{$mark};
+ $message =~ s/\n$//; # Kill trailing empty line
+ print OUT "data ", length($message), "\n", $message, "\n";
+
+ # Parent and any merges
+ print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
+ if (defined $merges{$mark})
+ {
+ foreach my $merge (@{$merges{$mark}})
+ {
+ print OUT "merge :", $revmap{$merge}, "\n";
+ }
+ }
+
+ # Output file marks
+ print OUT "deleteall\n"; # start from scratch
+ foreach my $file (sort keys %{$files{$mark}})
+ {
+ print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
+ }
+ print OUT "\n";
+}
+
+# Create one file blob
+sub fileblob
+{
+ my ($filename, $crlfmode, $mark) = @_;
+
+ # Import the file
+ print OUT "progress Importing $filename\nblob\nmark :$mark\n";
+ open FILE, '<', $filename or die "Cannot read $filename\n ";
+ binmode FILE;
+ my ($size, $mtime) = (stat(FILE))[7,9];
+ my $file;
+ read FILE, $file, $size;
+ close FILE;
+ $file =~ s/\r\n/\n/g if $crlfmode;
+ print OUT "data ", length($file), "\n", $file, "\n";
+
+ return $mtime;
+}
+
+# Parse a key=value pair
+sub parsekeyvaluepair
+{
+=pod
+
+=head2 Escaping special characters
+
+Key and value strings may be enclosed in quotes, in which case
+whitespace inside the quotes is preserved. Additionally, an equal
+sign may be included in the key by preceeding it with a backslash.
+For example:
+
+ "key1 "=value1
+ key2=" value2"
+ key\=3=value3
+ key4=value=4
+ "key5""=value5
+
+Here the first key is "key1 " (note the trailing white-space) and the
+second value is " value2" (note the leading white-space). The third
+key contains an equal sign "key=3" and so does the fourth value, which
+does not need to be escaped. The fifth key contains a trailing quote,
+which does not need to be escaped since it is inside a surrounding
+quote.
+
+=cut
+ my $pair = shift;
+
+ # Separate key and value by the first non-quoted equal sign
+ my ($key, $value);
+ if ($pair =~ /^(.*[^\\])=(.*)$/)
+ {
+ ($key, $value) = ($1, $2)
+ }
+ else
+ {
+ die "Parse error: $pair\n ";
+ }
+
+ # Unquote and unescape the key and value separately
+ return (&unescape($key), &unescape($value));
+}
+
+# Unquote and unescape
+sub unescape
+{
+ my $string = shift;
+
+ # First remove enclosing quotes. Backslash before the trailing
+ # quote leaves both.
+ if ($string =~ /^"(.*[^\\])"$/)
+ {
+ $string = $1;
+ }
+
+ # Second remove any backslashes inside the unquoted string.
+ # For later: Handle special sequences like \t ?
+ $string =~ s/\\(.)/$1/g;
+
+ return $string;
+}
+
+__END__
+
+=pod
+
+=head1 EXAMPLES
+
+B<import-directories.perl> F<project.import>
+
+=head1 AUTHOR
+
+Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation.
+
+=cut
diff --git a/contrib/fast-import/import-tars.perl b/contrib/fast-import/import-tars.perl
index 78e40d2a13..a909716682 100755
--- a/contrib/fast-import/import-tars.perl
+++ b/contrib/fast-import/import-tars.perl
@@ -8,9 +8,20 @@
## perl import-tars.perl *.tar.bz2
## git whatchanged import-tars
##
+## Use --metainfo to specify the extension for a meta data file, where
+## import-tars can read the commit message and optionally author and
+## committer information.
+##
+## echo 'This is the commit message' > myfile.tar.bz2.msg
+## perl import-tars.perl --metainfo=msg myfile.tar.bz2
use strict;
-die "usage: import-tars *.tar.{gz,bz2,Z}\n" unless @ARGV;
+use Getopt::Long;
+
+my $metaext = '';
+
+die "usage: import-tars [--metainfo=extension] *.tar.{gz,bz2,Z}\n"
+ unless GetOptions('metainfo=s' => \$metaext) && @ARGV;
my $branch_name = 'import-tars';
my $branch_ref = "refs/heads/$branch_name";
@@ -109,12 +120,43 @@ foreach my $tar_file (@ARGV)
$have_top_dir = 0 if $top_dir ne $1;
}
+ my $commit_msg = "Imported from $tar_file.";
+ my $this_committer_name = $committer_name;
+ my $this_committer_email = $committer_email;
+ my $this_author_name = $author_name;
+ my $this_author_email = $author_email;
+ if ($metaext ne '') {
+ # Optionally read a commit message from <filename.tar>.msg
+ # Add a line on the form "Committer: name <e-mail>" to override
+ # the committer and "Author: name <e-mail>" to override the
+ # author for this tar ball.
+ if (open MSG, '<', "${tar_file}.${metaext}") {
+ my $header_done = 0;
+ $commit_msg = '';
+ while (<MSG>) {
+ if (!$header_done && /^Committer:\s+([^<>]*)\s+<(.*)>\s*$/i) {
+ $this_committer_name = $1;
+ $this_committer_email = $2;
+ } elsif (!$header_done && /^Author:\s+([^<>]*)\s+<(.*)>\s*$/i) {
+ $this_author_name = $1;
+ $this_author_email = $2;
+ } elsif (!$header_done && /^$/ { # empty line ends header.
+ $header_done = 1;
+ } else {
+ $commit_msg .= $_;
+ $header_done = 1;
+ }
+ }
+ close MSG;
+ }
+ }
+
print FI <<EOF;
commit $branch_ref
-author $author_name <$author_email> $author_time +0000
-committer $committer_name <$committer_email> $commit_time +0000
+author $this_author_name <$this_author_email> $author_time +0000
+committer $this_committer_name <$this_committer_email> $commit_time +0000
data <<END_OF_COMMIT_MESSAGE
-Imported from $tar_file.
+$commit_msg
END_OF_COMMIT_MESSAGE
deleteall