summaryrefslogtreecommitdiff
path: root/contrib/fast-import
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/fast-import')
-rwxr-xr-xcontrib/fast-import/git-p4216
-rwxr-xr-xcontrib/fast-import/import-directories.perl416
-rwxr-xr-xcontrib/fast-import/import-tars.perl84
-rwxr-xr-xcontrib/fast-import/import-zips.py3
4 files changed, 620 insertions, 99 deletions
diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index b44fbfc9b3..48059d0aa7 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -8,12 +8,10 @@
# License: MIT <http://www.opensource.org/licenses/mit-license.php>
#
-import optparse, sys, os, marshal, popen2, subprocess, shelve
-import tempfile, getopt, sha, os.path, time, platform
+import optparse, sys, os, marshal, subprocess, shelve
+import tempfile, getopt, os.path, time, platform
import re
-from sets import Set;
-
verbose = False
@@ -201,7 +199,7 @@ def isModeExec(mode):
def isModeExecChanged(src_mode, dst_mode):
return isModeExec(src_mode) != isModeExec(dst_mode)
-def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None):
cmd = p4_build_cmd("-G %s" % (cmd))
if verbose:
sys.stderr.write("Opening pipe: %s\n" % cmd)
@@ -224,7 +222,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
try:
while True:
entry = marshal.load(p4.stdout)
- result.append(entry)
+ if cb is not None:
+ cb(entry)
+ else:
+ result.append(entry)
except EOFError:
pass
exitCode = p4.wait()
@@ -245,7 +246,22 @@ def p4Cmd(cmd):
def p4Where(depotPath):
if not depotPath.endswith("/"):
depotPath += "/"
- output = p4Cmd("where %s..." % depotPath)
+ depotPath = depotPath + "..."
+ outputList = p4CmdList("where %s" % depotPath)
+ output = None
+ for entry in outputList:
+ if "depotFile" in entry:
+ if entry["depotFile"] == depotPath:
+ output = entry
+ break
+ elif "data" in entry:
+ data = entry.get("data")
+ space = data.find(" ")
+ if data[:space] == depotPath:
+ output = entry
+ break
+ if output == None:
+ return ""
if output["code"] == "error":
return ""
clientPath = ""
@@ -427,13 +443,14 @@ def p4ChangesForPaths(depotPaths, changeRange):
output = p4_read_pipe_lines("changes " + ' '.join (["%s...%s" % (p, changeRange)
for p in depotPaths]))
- changes = []
+ changes = {}
for line in output:
- changeNum = line.split(" ")[1]
- changes.append(int(changeNum))
+ changeNum = int(line.split(" ")[1])
+ changes[changeNum] = True
- changes.sort()
- return changes
+ changelist = changes.keys()
+ changelist.sort()
+ return changelist
class Command:
def __init__(self):
@@ -712,13 +729,10 @@ class P4Submit(Command):
tmpFile.write(submitTemplate + separatorLine + diff + newdiff)
tmpFile.close()
mtime = os.stat(fileName).st_mtime
- defaultEditor = "vi"
- if platform.system() == "Windows":
- defaultEditor = "notepad"
if os.environ.has_key("P4EDITOR"):
editor = os.environ.get("P4EDITOR")
else:
- editor = os.environ.get("EDITOR", defaultEditor);
+ editor = read_pipe("git var GIT_EDITOR")
system(editor + " " + fileName)
response = "y"
@@ -845,8 +859,8 @@ class P4Sync(Command):
self.usage += " //depot/path[@revRange]"
self.silent = False
- self.createdBranches = Set()
- self.committedChanges = Set()
+ self.createdBranches = set()
+ self.committedChanges = set()
self.branch = ""
self.detectBranches = False
self.detectLabels = False
@@ -934,10 +948,84 @@ class P4Sync(Command):
return branches
- ## Should move this out, doesn't use SELF.
- def readP4Files(self, files):
+ # output one file from the P4 stream
+ # - helper for streamP4Files
+
+ def streamOneP4File(self, file, contents):
+ if file["type"] == "apple":
+ print "\nfile %s is a strange apple file that forks. Ignoring" % \
+ file['depotFile']
+ return
+
+ relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
+ if verbose:
+ sys.stderr.write("%s\n" % relPath)
+
+ mode = "644"
+ if isP4Exec(file["type"]):
+ mode = "755"
+ elif file["type"] == "symlink":
+ mode = "120000"
+ # p4 print on a symlink contains "target\n", so strip it off
+ last = contents.pop()
+ last = last[:-1]
+ contents.append(last)
+
+ if self.isWindows and file["type"].endswith("text"):
+ mangled = []
+ for data in contents:
+ data = data.replace("\r\n", "\n")
+ mangled.append(data)
+ contents = mangled
+
+ if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
+ contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents)
+ elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
+ contents = map(lambda text: re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text), contents)
+
+ self.gitStream.write("M %s inline %s\n" % (mode, relPath))
+
+ # total length...
+ length = 0
+ for d in contents:
+ length = length + len(d)
+
+ self.gitStream.write("data %d\n" % length)
+ for d in contents:
+ self.gitStream.write(d)
+ self.gitStream.write("\n")
+
+ def streamOneP4Deletion(self, file):
+ relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
+ if verbose:
+ sys.stderr.write("delete %s\n" % relPath)
+ self.gitStream.write("D %s\n" % relPath)
+
+ # handle another chunk of streaming data
+ def streamP4FilesCb(self, marshalled):
+
+ if marshalled.has_key('depotFile') and self.stream_have_file_info:
+ # start of a new file - output the old one first
+ self.streamOneP4File(self.stream_file, self.stream_contents)
+ self.stream_file = {}
+ self.stream_contents = []
+ self.stream_have_file_info = False
+
+ # pick up the new file information... for the
+ # 'data' field we need to append to our array
+ for k in marshalled.keys():
+ if k == 'data':
+ self.stream_contents.append(marshalled['data'])
+ else:
+ self.stream_file[k] = marshalled[k]
+
+ self.stream_have_file_info = True
+
+ # Stream directly from "p4 files" into "git fast-import"
+ def streamP4Files(self, files):
filesForCommit = []
filesToRead = []
+ filesToDelete = []
for f in files:
includeFile = True
@@ -951,50 +1039,35 @@ class P4Sync(Command):
filesForCommit.append(f)
if f['action'] not in ('delete', 'purge'):
filesToRead.append(f)
+ else:
+ filesToDelete.append(f)
- filedata = []
- if len(filesToRead) > 0:
- filedata = p4CmdList('-x - print',
- stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
- for f in filesToRead]),
- stdin_mode='w+')
-
- if "p4ExitCode" in filedata[0]:
- die("Problems executing p4. Error: [%d]."
- % (filedata[0]['p4ExitCode']));
-
- j = 0;
- contents = {}
- while j < len(filedata):
- stat = filedata[j]
- j += 1
- text = ''
- while j < len(filedata) and filedata[j]['code'] in ('text', 'unicode', 'binary'):
- text += filedata[j]['data']
- del filedata[j]['data']
- j += 1
-
- if not stat.has_key('depotFile'):
- sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
- continue
+ # deleted files...
+ for f in filesToDelete:
+ self.streamOneP4Deletion(f)
- if stat['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
- text = re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text)
- elif stat['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
- text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text)
+ if len(filesToRead) > 0:
+ self.stream_file = {}
+ self.stream_contents = []
+ self.stream_have_file_info = False
- contents[stat['depotFile']] = text
+ # curry self argument
+ def streamP4FilesCbSelf(entry):
+ self.streamP4FilesCb(entry)
- for f in filesForCommit:
- path = f['path']
- if contents.has_key(path):
- f['data'] = contents[path]
+ p4CmdList("-x - print",
+ '\n'.join(['%s#%s' % (f['path'], f['rev'])
+ for f in filesToRead]),
+ cb=streamP4FilesCbSelf)
- return filesForCommit
+ # do the last chunk
+ if self.stream_file.has_key('depotFile'):
+ self.streamOneP4File(self.stream_file, self.stream_contents)
def commit(self, details, files, branch, branchPrefixes, parent = ""):
epoch = details["time"]
author = details["user"]
+ self.branchPrefixes = branchPrefixes
if self.verbose:
print "commit into %s" % branch
@@ -1007,7 +1080,6 @@ class P4Sync(Command):
new_files.append (f)
else:
sys.stderr.write("Ignoring file outside of prefix: %s\n" % path)
- files = self.readP4Files(new_files)
self.gitStream.write("commit %s\n" % branch)
# gitStream.write("mark :%s\n" % details["change"])
@@ -1035,33 +1107,7 @@ class P4Sync(Command):
print "parent %s" % parent
self.gitStream.write("from %s\n" % parent)
- for file in files:
- if file["type"] == "apple":
- print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path']
- continue
-
- relPath = self.stripRepoPath(file['path'], branchPrefixes)
- if file["action"] in ("delete", "purge"):
- self.gitStream.write("D %s\n" % relPath)
- else:
- data = file['data']
-
- mode = "644"
- if isP4Exec(file["type"]):
- mode = "755"
- elif file["type"] == "symlink":
- mode = "120000"
- # p4 print on a symlink contains "target\n", so strip it off
- data = data[:-1]
-
- if self.isWindows and file["type"].endswith("text"):
- data = data.replace("\r\n", "\n")
-
- self.gitStream.write("M %s inline %s\n" % (mode, relPath))
- self.gitStream.write("data %s\n" % len(data))
- self.gitStream.write(data)
- self.gitStream.write("\n")
-
+ self.streamP4Files(new_files)
self.gitStream.write("\n")
change = int(details["change"])
@@ -1126,7 +1172,7 @@ class P4Sync(Command):
s = ''
for (key, val) in self.users.items():
- s += "%s\t%s\n" % (key, val)
+ s += "%s\t%s\n" % (key.expandtabs(1), val.expandtabs(1))
open(self.getUserCacheFilename(), "wb").write(s)
self.userMapFromPerforceServer = True
@@ -1611,7 +1657,7 @@ class P4Sync(Command):
if len(self.changesFile) > 0:
output = open(self.changesFile).readlines()
- changeSet = Set()
+ changeSet = set()
for line in output:
changeSet.add(int(line))
diff --git a/contrib/fast-import/import-directories.perl b/contrib/fast-import/import-directories.perl
new file mode 100755
index 0000000000..5782d80e26
--- /dev/null
+++ b/contrib/fast-import/import-directories.perl
@@ -0,0 +1,416 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se>
+#
+# ------------------------------------------------------------------------
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+#
+# ------------------------------------------------------------------------
+
+=pod
+
+=head1 NAME
+
+import-directories - Import bits and pieces to Git.
+
+=head1 SYNOPSIS
+
+B<import-directories.perl> F<configfile> F<outputfile>
+
+=head1 DESCRIPTION
+
+Script to import arbitrary projects version controlled by the "copy the
+source directory to a new location and edit it there"-version controlled
+projects into version control. Handles projects with arbitrary branching
+and version trees, taking a file describing the inputs and generating a
+file compatible with the L<git-fast-import(1)> format.
+
+=head1 CONFIGURATION FILE
+
+=head2 Format
+
+The configuration file is based on the standard I<.ini> format.
+
+ ; Comments start with semi-colons
+ [section]
+ key=value
+
+Please see below for information on how to escape special characters.
+
+=head2 Global configuration
+
+Global configuration is done in the B<[config]> section, which should be
+the first section in the file. Configuration can be changed by
+repeating configuration sections later on.
+
+ [config]
+ ; configure conversion of CRLFs. "convert" means that all CRLFs
+ ; should be converted into LFs (suitable for the core.autocrlf
+ ; setting set to true in Git). "none" means that all data is
+ ; treated as binary.
+ crlf=convert
+
+=head2 Revision configuration
+
+Each revision that is to be imported is described in three
+sections. Revisions should be defined in topological order, so
+that a revision's parent has always been defined when a new revision
+is introduced. All the sections for one revision must be defined
+before defining the next revision.
+
+Each revision is assigned a unique numerical identifier. The
+numbers do not need to be consecutive, nor monotonically
+increasing.
+
+For instance, if your configuration file contains only the two
+revisions 4711 and 42, where 4711 is the initial commit, the
+only requirement is that 4711 is completely defined before 42.
+
+=pod
+
+=head3 Revision description section
+
+A section whose section name is just an integer gives meta-data
+about the revision.
+
+ [3]
+ ; author sets the author of the revisions
+ author=Peter Krefting <peter@softwolves.pp.se>
+ ; branch sets the branch that the revision should be committed to
+ branch=master
+ ; parent describes the revision that is the parent of this commit
+ ; (optional)
+ parent=1
+ ; merges describes a revision that is merged into this commit
+ ; (optional; can be repeated)
+ merges=2
+ ; selects one file to take the timestamp from
+ ; (optional; if unspecified, the most recent file from the .files
+ ; section is used)
+ timestamp=3/source.c
+
+=head3 Revision contents section
+
+A section whose section name is an integer followed by B<.files>
+describe all the files included in this revision. If a file that
+was available previously is not included in this revision, it will
+be removed.
+
+If an on-disk revision is incomplete, you can point to files from
+a previous revision. There are no restriction as to where the source
+files are located, nor to the names of them.
+
+ [3.files]
+ ; the key is the path inside the repository, the value is the path
+ ; as seen from the importer script.
+ source.c=ver-3.00/source.c
+ source.h=ver-2.99/source.h
+ readme.txt=ver-3.00/introduction to the project.txt
+
+File names are treated as byte strings (but please see below on
+quoting rules), and should be stored in the configuration file in
+the encoding that should be used in the generated repository.
+
+=head3 Revision commit message section
+
+A section whose section name is an integer followed by B<.message>
+gives the commit message. This section is read verbatim, up until
+the beginning of the next section. As such, a commit message may not
+contain a line that begins with an opening square bracket ("[") and
+ends with a closing square bracket ("]"), unless they are surrounded
+by whitespace or other characters.
+
+ [3.message]
+ Implement foobar.
+ ; trailing blank lines are ignored.
+
+=cut
+
+# Globals
+use strict;
+use integer;
+my $crlfmode = 0;
+my @revs;
+my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource);
+my $sectiontype = 0;
+my $rev = 0;
+my $mark = 1;
+
+# Check command line
+if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/)
+{
+ exec('perldoc', $0);
+ exit 1;
+}
+
+# Open configuration
+my $config = $ARGV[0];
+open CFG, '<', $config or die "Cannot open configuration file \"$config\": ";
+
+# Open output
+my $output = $ARGV[1];
+open OUT, '>', $output or die "Cannot create output file \"$output\": ";
+binmode OUT;
+
+LINE: while (my $line = <CFG>)
+{
+ $line =~ s/\r?\n$//;
+ next LINE if $sectiontype != 4 && $line eq '';
+ next LINE if $line =~ /^;/;
+ my $oldsectiontype = $sectiontype;
+ my $oldrev = $rev;
+
+ # Sections
+ if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$")
+ {
+ if ($1 eq 'config')
+ {
+ $sectiontype = 1;
+ }
+ elsif ($3 eq '')
+ {
+ $sectiontype = 2;
+ $rev = $2;
+ # Create a new revision
+ die "Duplicate rev: $line\n " if defined $revmap{$rev};
+ print "Reading revision $rev\n";
+ push @revs, $rev;
+ $revmap{$rev} = $mark ++;
+ $time{$revmap{$rev}} = 0;
+ }
+ elsif ($3 eq '.files')
+ {
+ $sectiontype = 3;
+ $rev = $2;
+ die "Revision mismatch: $line\n " unless $rev == $oldrev;
+ }
+ elsif ($3 eq '.message')
+ {
+ $sectiontype = 4;
+ $rev = $2;
+ die "Revision mismatch: $line\n " unless $rev == $oldrev;
+ }
+ else
+ {
+ die "Internal parse error: $line\n ";
+ }
+ next LINE;
+ }
+
+ # Parse data
+ if ($sectiontype != 4)
+ {
+ # Key and value
+ if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$")
+ {
+ my ($key, $value) = &parsekeyvaluepair($1);
+ # Global configuration
+ if (1 == $sectiontype)
+ {
+ if ($key eq 'crlf')
+ {
+ $crlfmode = 1, next LINE if $value eq 'convert';
+ $crlfmode = 0, next LINE if $value eq 'none';
+ }
+ die "Unknown configuration option: $line\n ";
+ }
+ # Revision specification
+ if (2 == $sectiontype)
+ {
+ my $current = $revmap{$rev};
+ $author{$current} = $value, next LINE if $key eq 'author';
+ $branch{$current} = $value, next LINE if $key eq 'branch';
+ $parent{$current} = $value, next LINE if $key eq 'parent';
+ $timesource{$current} = $value, next LINE if $key eq 'timestamp';
+ push(@{$merges{$current}}, $value), next LINE if $key eq 'merges';
+ die "Unknown revision option: $line\n ";
+ }
+ # Filespecs
+ if (3 == $sectiontype)
+ {
+ # Add the file and create a marker
+ die "File not found: $line\n " unless -f $value;
+ my $current = $revmap{$rev};
+ ${$files{$current}}{$key} = $mark;
+ my $time = &fileblob($value, $crlfmode, $mark ++);
+
+ # Update revision timestamp if more recent than other
+ # files seen, or if this is the file we have selected
+ # to take the time stamp from using the "timestamp"
+ # directive.
+ if ((defined $timesource{$current} && $timesource{$current} eq $value)
+ || $time > $time{$current})
+ {
+ $time{$current} = $time;
+ }
+ }
+ }
+ else
+ {
+ die "Parse error: $line\n ";
+ }
+ }
+ else
+ {
+ # Commit message
+ my $current = $revmap{$rev};
+ if (defined $message{$current})
+ {
+ $message{$current} .= "\n";
+ }
+ $message{$current} .= $line;
+ }
+}
+close CFG;
+
+# Start spewing out data for git-fast-import
+foreach my $commit (@revs)
+{
+ # Progress
+ print OUT "progress Creating revision $commit\n";
+
+ # Create commit header
+ my $mark = $revmap{$commit};
+
+ # Branch and commit id
+ print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n";
+
+ # Author and timestamp
+ die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark};
+ print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n";
+
+ # Commit message
+ die "No message defined for $commit\n" unless defined $message{$mark};
+ my $message = $message{$mark};
+ $message =~ s/\n$//; # Kill trailing empty line
+ print OUT "data ", length($message), "\n", $message, "\n";
+
+ # Parent and any merges
+ print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark};
+ if (defined $merges{$mark})
+ {
+ foreach my $merge (@{$merges{$mark}})
+ {
+ print OUT "merge :", $revmap{$merge}, "\n";
+ }
+ }
+
+ # Output file marks
+ print OUT "deleteall\n"; # start from scratch
+ foreach my $file (sort keys %{$files{$mark}})
+ {
+ print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n";
+ }
+ print OUT "\n";
+}
+
+# Create one file blob
+sub fileblob
+{
+ my ($filename, $crlfmode, $mark) = @_;
+
+ # Import the file
+ print OUT "progress Importing $filename\nblob\nmark :$mark\n";
+ open FILE, '<', $filename or die "Cannot read $filename\n ";
+ binmode FILE;
+ my ($size, $mtime) = (stat(FILE))[7,9];
+ my $file;
+ read FILE, $file, $size;
+ close FILE;
+ $file =~ s/\r\n/\n/g if $crlfmode;
+ print OUT "data ", length($file), "\n", $file, "\n";
+
+ return $mtime;
+}
+
+# Parse a key=value pair
+sub parsekeyvaluepair
+{
+=pod
+
+=head2 Escaping special characters
+
+Key and value strings may be enclosed in quotes, in which case
+whitespace inside the quotes is preserved. Additionally, an equal
+sign may be included in the key by preceeding it with a backslash.
+For example:
+
+ "key1 "=value1
+ key2=" value2"
+ key\=3=value3
+ key4=value=4
+ "key5""=value5
+
+Here the first key is "key1 " (note the trailing white-space) and the
+second value is " value2" (note the leading white-space). The third
+key contains an equal sign "key=3" and so does the fourth value, which
+does not need to be escaped. The fifth key contains a trailing quote,
+which does not need to be escaped since it is inside a surrounding
+quote.
+
+=cut
+ my $pair = shift;
+
+ # Separate key and value by the first non-quoted equal sign
+ my ($key, $value);
+ if ($pair =~ /^(.*[^\\])=(.*)$/)
+ {
+ ($key, $value) = ($1, $2)
+ }
+ else
+ {
+ die "Parse error: $pair\n ";
+ }
+
+ # Unquote and unescape the key and value separately
+ return (&unescape($key), &unescape($value));
+}
+
+# Unquote and unescape
+sub unescape
+{
+ my $string = shift;
+
+ # First remove enclosing quotes. Backslash before the trailing
+ # quote leaves both.
+ if ($string =~ /^"(.*[^\\])"$/)
+ {
+ $string = $1;
+ }
+
+ # Second remove any backslashes inside the unquoted string.
+ # For later: Handle special sequences like \t ?
+ $string =~ s/\\(.)/$1/g;
+
+ return $string;
+}
+
+__END__
+
+=pod
+
+=head1 EXAMPLES
+
+B<import-directories.perl> F<project.import>
+
+=head1 AUTHOR
+
+Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se>
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation.
+
+=cut
diff --git a/contrib/fast-import/import-tars.perl b/contrib/fast-import/import-tars.perl
index 23aeb257b9..95438e1ed4 100755
--- a/contrib/fast-import/import-tars.perl
+++ b/contrib/fast-import/import-tars.perl
@@ -8,19 +8,35 @@
## perl import-tars.perl *.tar.bz2
## git whatchanged import-tars
##
+## Use --metainfo to specify the extension for a meta data file, where
+## import-tars can read the commit message and optionally author and
+## committer information.
+##
+## echo 'This is the commit message' > myfile.tar.bz2.msg
+## perl import-tars.perl --metainfo=msg myfile.tar.bz2
use strict;
-die "usage: import-tars *.tar.{gz,bz2,Z}\n" unless @ARGV;
+use Getopt::Long;
+
+my $metaext = '';
+
+die "usage: import-tars [--metainfo=extension] *.tar.{gz,bz2,lzma,xz,Z}\n"
+ unless GetOptions('metainfo=s' => \$metaext) && @ARGV;
my $branch_name = 'import-tars';
my $branch_ref = "refs/heads/$branch_name";
-my $committer_name = 'T Ar Creator';
-my $committer_email = 'tar@example.com';
+my $author_name = $ENV{'GIT_AUTHOR_NAME'} || 'T Ar Creator';
+my $author_email = $ENV{'GIT_AUTHOR_EMAIL'} || 'tar@example.com';
+my $committer_name = $ENV{'GIT_COMMITTER_NAME'} || `git config --get user.name`;
+my $committer_email = $ENV{'GIT_COMMITTER_EMAIL'} || `git config --get user.email`;
+
+chomp($committer_name, $committer_email);
open(FI, '|-', 'git', 'fast-import', '--quiet')
or die "Unable to start git fast-import: $!\n";
foreach my $tar_file (@ARGV)
{
+ my $commit_time = time;
$tar_file =~ m,([^/]+)$,;
my $tar_name = $1;
@@ -33,13 +49,16 @@ foreach my $tar_file (@ARGV)
} elsif ($tar_name =~ s/\.tar\.Z$//) {
open(I, '-|', 'uncompress', '-c', $tar_file)
or die "Unable to uncompress -c $tar_file: $!\n";
+ } elsif ($tar_name =~ s/\.(tar\.(lzma|xz)|(tlz|txz))$//) {
+ open(I, '-|', 'xz', '-dc', $tar_file)
+ or die "Unable to xz -dc $tar_file: $!\n";
} elsif ($tar_name =~ s/\.tar$//) {
open(I, $tar_file) or die "Unable to open $tar_file: $!\n";
} else {
die "Unrecognized compression format: $tar_file\n";
}
- my $commit_time = 0;
+ my $author_time = 0;
my $next_mark = 1;
my $have_top_dir = 1;
my ($top_dir, %files);
@@ -77,10 +96,16 @@ foreach my $tar_file (@ARGV)
$mtime = oct $mtime;
next if $typeflag == 5; # directory
- print FI "blob\n", "mark :$next_mark\n", "data $size\n";
- while ($size > 0 && read(I, $_, 512) == 512) {
- print FI substr($_, 0, $size);
- $size -= 512;
+ print FI "blob\n", "mark :$next_mark\n";
+ if ($typeflag == 2) { # symbolic link
+ print FI "data ", length($linkname), "\n", $linkname;
+ $mode = 0120000;
+ } else {
+ print FI "data $size\n";
+ while ($size > 0 && read(I, $_, 512) == 512) {
+ print FI substr($_, 0, $size);
+ $size -= 512;
+ }
}
print FI "\n";
@@ -92,17 +117,49 @@ foreach my $tar_file (@ARGV)
}
$files{$path} = [$next_mark++, $mode];
- $commit_time = $mtime if $mtime > $commit_time;
+ $author_time = $mtime if $mtime > $author_time;
$path =~ m,^([^/]+)/,;
$top_dir = $1 unless $top_dir;
$have_top_dir = 0 if $top_dir ne $1;
}
+ my $commit_msg = "Imported from $tar_file.";
+ my $this_committer_name = $committer_name;
+ my $this_committer_email = $committer_email;
+ my $this_author_name = $author_name;
+ my $this_author_email = $author_email;
+ if ($metaext ne '') {
+ # Optionally read a commit message from <filename.tar>.msg
+ # Add a line on the form "Committer: name <e-mail>" to override
+ # the committer and "Author: name <e-mail>" to override the
+ # author for this tar ball.
+ if (open MSG, '<', "${tar_file}.${metaext}") {
+ my $header_done = 0;
+ $commit_msg = '';
+ while (<MSG>) {
+ if (!$header_done && /^Committer:\s+([^<>]*)\s+<(.*)>\s*$/i) {
+ $this_committer_name = $1;
+ $this_committer_email = $2;
+ } elsif (!$header_done && /^Author:\s+([^<>]*)\s+<(.*)>\s*$/i) {
+ $this_author_name = $1;
+ $this_author_email = $2;
+ } elsif (!$header_done && /^$/) { # empty line ends header.
+ $header_done = 1;
+ } else {
+ $commit_msg .= $_;
+ $header_done = 1;
+ }
+ }
+ close MSG;
+ }
+ }
+
print FI <<EOF;
commit $branch_ref
-committer $committer_name <$committer_email> $commit_time +0000
+author $this_author_name <$this_author_email> $author_time +0000
+committer $this_committer_name <$this_committer_email> $commit_time +0000
data <<END_OF_COMMIT_MESSAGE
-Imported from $tar_file.
+$commit_msg
END_OF_COMMIT_MESSAGE
deleteall
@@ -112,14 +169,15 @@ EOF
{
my ($mark, $mode) = @{$files{$path}};
$path =~ s,^([^/]+)/,, if $have_top_dir;
- printf FI "M %o :%i %s\n", $mode & 0111 ? 0755 : 0644, $mark, $path;
+ $mode = $mode & 0111 ? 0755 : 0644 unless $mode == 0120000;
+ printf FI "M %o :%i %s\n", $mode, $mark, $path;
}
print FI "\n";
print FI <<EOF;
tag $tar_name
from $branch_ref
-tagger $committer_name <$committer_email> $commit_time +0000
+tagger $author_name <$author_email> $author_time +0000
data <<END_OF_TAG_MESSAGE
Package $tar_name
END_OF_TAG_MESSAGE
diff --git a/contrib/fast-import/import-zips.py b/contrib/fast-import/import-zips.py
index c674fa2d1b..7051a83a59 100755
--- a/contrib/fast-import/import-zips.py
+++ b/contrib/fast-import/import-zips.py
@@ -44,7 +44,8 @@ for zipfile in argv[1:]:
common_prefix = name[:name.rfind('/') + 1]
else:
while not name.startswith(common_prefix):
- common_prefix = name[:name.rfind('/') + 1]
+ last_slash = common_prefix[:-1].rfind('/') + 1
+ common_prefix = common_prefix[:last_slash]
mark[name] = ':' + str(next_mark)
next_mark += 1