diff options
Diffstat (limited to 'contrib/fast-import')
-rwxr-xr-x | contrib/fast-import/git-p4 | 181 | ||||
-rwxr-xr-x | contrib/fast-import/import-directories.perl | 416 | ||||
-rwxr-xr-x | contrib/fast-import/import-tars.perl | 50 |
3 files changed, 569 insertions, 78 deletions
diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4 index 342529db30..e710219ca5 100755 --- a/contrib/fast-import/git-p4 +++ b/contrib/fast-import/git-p4 @@ -8,12 +8,10 @@ # License: MIT <http://www.opensource.org/licenses/mit-license.php> # -import optparse, sys, os, marshal, popen2, subprocess, shelve -import tempfile, getopt, sha, os.path, time, platform +import optparse, sys, os, marshal, subprocess, shelve +import tempfile, getopt, os.path, time, platform import re -from sets import Set; - verbose = False @@ -201,7 +199,7 @@ def isModeExec(mode): def isModeExecChanged(src_mode, dst_mode): return isModeExec(src_mode) != isModeExec(dst_mode) -def p4CmdList(cmd, stdin=None, stdin_mode='w+b'): +def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None): cmd = p4_build_cmd("-G %s" % (cmd)) if verbose: sys.stderr.write("Opening pipe: %s\n" % cmd) @@ -224,7 +222,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'): try: while True: entry = marshal.load(p4.stdout) - result.append(entry) + if cb is not None: + cb(entry) + else: + result.append(entry) except EOFError: pass exitCode = p4.wait() @@ -861,8 +862,8 @@ class P4Sync(Command): self.usage += " //depot/path[@revRange]" self.silent = False - self.createdBranches = Set() - self.committedChanges = Set() + self.createdBranches = set() + self.committedChanges = set() self.branch = "" self.detectBranches = False self.detectLabels = False @@ -950,10 +951,84 @@ class P4Sync(Command): return branches - ## Should move this out, doesn't use SELF. - def readP4Files(self, files): + # output one file from the P4 stream + # - helper for streamP4Files + + def streamOneP4File(self, file, contents): + if file["type"] == "apple": + print "\nfile %s is a strange apple file that forks. Ignoring" % \ + file['depotFile'] + return + + relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) + if verbose: + sys.stderr.write("%s\n" % relPath) + + mode = "644" + if isP4Exec(file["type"]): + mode = "755" + elif file["type"] == "symlink": + mode = "120000" + # p4 print on a symlink contains "target\n", so strip it off + last = contents.pop() + last = last[:-1] + contents.append(last) + + if self.isWindows and file["type"].endswith("text"): + mangled = [] + for data in contents: + data = data.replace("\r\n", "\n") + mangled.append(data) + contents = mangled + + if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'): + contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents) + elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'): + contents = map(lambda text: re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text), contents) + + self.gitStream.write("M %s inline %s\n" % (mode, relPath)) + + # total length... + length = 0 + for d in contents: + length = length + len(d) + + self.gitStream.write("data %d\n" % length) + for d in contents: + self.gitStream.write(d) + self.gitStream.write("\n") + + def streamOneP4Deletion(self, file): + relPath = self.stripRepoPath(file['path'], self.branchPrefixes) + if verbose: + sys.stderr.write("delete %s\n" % relPath) + self.gitStream.write("D %s\n" % relPath) + + # handle another chunk of streaming data + def streamP4FilesCb(self, marshalled): + + if marshalled.has_key('depotFile') and self.stream_have_file_info: + # start of a new file - output the old one first + self.streamOneP4File(self.stream_file, self.stream_contents) + self.stream_file = {} + self.stream_contents = [] + self.stream_have_file_info = False + + # pick up the new file information... for the + # 'data' field we need to append to our array + for k in marshalled.keys(): + if k == 'data': + self.stream_contents.append(marshalled['data']) + else: + self.stream_file[k] = marshalled[k] + + self.stream_have_file_info = True + + # Stream directly from "p4 files" into "git fast-import" + def streamP4Files(self, files): filesForCommit = [] filesToRead = [] + filesToDelete = [] for f in files: includeFile = True @@ -967,50 +1042,35 @@ class P4Sync(Command): filesForCommit.append(f) if f['action'] not in ('delete', 'purge'): filesToRead.append(f) + else: + filesToDelete.append(f) - filedata = [] - if len(filesToRead) > 0: - filedata = p4CmdList('-x - print', - stdin='\n'.join(['%s#%s' % (f['path'], f['rev']) - for f in filesToRead]), - stdin_mode='w+') - - if "p4ExitCode" in filedata[0]: - die("Problems executing p4. Error: [%d]." - % (filedata[0]['p4ExitCode'])); - - j = 0; - contents = {} - while j < len(filedata): - stat = filedata[j] - j += 1 - text = '' - while j < len(filedata) and filedata[j]['code'] in ('text', 'unicode', 'binary'): - text += filedata[j]['data'] - del filedata[j]['data'] - j += 1 - - if not stat.has_key('depotFile'): - sys.stderr.write("p4 print fails with: %s\n" % repr(stat)) - continue + # deleted files... + for f in filesToDelete: + self.streamOneP4Deletion(f) - if stat['type'] in ('text+ko', 'unicode+ko', 'binary+ko'): - text = re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text) - elif stat['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'): - text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text) + if len(filesToRead) > 0: + self.stream_file = {} + self.stream_contents = [] + self.stream_have_file_info = False - contents[stat['depotFile']] = text + # curry self argument + def streamP4FilesCbSelf(entry): + self.streamP4FilesCb(entry) - for f in filesForCommit: - path = f['path'] - if contents.has_key(path): - f['data'] = contents[path] + p4CmdList("-x - print", + '\n'.join(['%s#%s' % (f['path'], f['rev']) + for f in filesToRead]), + cb=streamP4FilesCbSelf) - return filesForCommit + # do the last chunk + if self.stream_file.has_key('depotFile'): + self.streamOneP4File(self.stream_file, self.stream_contents) def commit(self, details, files, branch, branchPrefixes, parent = ""): epoch = details["time"] author = details["user"] + self.branchPrefixes = branchPrefixes if self.verbose: print "commit into %s" % branch @@ -1023,7 +1083,6 @@ class P4Sync(Command): new_files.append (f) else: sys.stderr.write("Ignoring file outside of prefix: %s\n" % path) - files = self.readP4Files(new_files) self.gitStream.write("commit %s\n" % branch) # gitStream.write("mark :%s\n" % details["change"]) @@ -1051,33 +1110,7 @@ class P4Sync(Command): print "parent %s" % parent self.gitStream.write("from %s\n" % parent) - for file in files: - if file["type"] == "apple": - print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path'] - continue - - relPath = self.stripRepoPath(file['path'], branchPrefixes) - if file["action"] in ("delete", "purge"): - self.gitStream.write("D %s\n" % relPath) - else: - data = file['data'] - - mode = "644" - if isP4Exec(file["type"]): - mode = "755" - elif file["type"] == "symlink": - mode = "120000" - # p4 print on a symlink contains "target\n", so strip it off - data = data[:-1] - - if self.isWindows and file["type"].endswith("text"): - data = data.replace("\r\n", "\n") - - self.gitStream.write("M %s inline %s\n" % (mode, relPath)) - self.gitStream.write("data %s\n" % len(data)) - self.gitStream.write(data) - self.gitStream.write("\n") - + self.streamP4Files(new_files) self.gitStream.write("\n") change = int(details["change"]) @@ -1627,7 +1660,7 @@ class P4Sync(Command): if len(self.changesFile) > 0: output = open(self.changesFile).readlines() - changeSet = Set() + changeSet = set() for line in output: changeSet.add(int(line)) diff --git a/contrib/fast-import/import-directories.perl b/contrib/fast-import/import-directories.perl new file mode 100755 index 0000000000..5782d80e26 --- /dev/null +++ b/contrib/fast-import/import-directories.perl @@ -0,0 +1,416 @@ +#!/usr/bin/perl -w +# +# Copyright 2008-2009 Peter Krefting <peter@softwolves.pp.se> +# +# ------------------------------------------------------------------------ +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +# +# ------------------------------------------------------------------------ + +=pod + +=head1 NAME + +import-directories - Import bits and pieces to Git. + +=head1 SYNOPSIS + +B<import-directories.perl> F<configfile> F<outputfile> + +=head1 DESCRIPTION + +Script to import arbitrary projects version controlled by the "copy the +source directory to a new location and edit it there"-version controlled +projects into version control. Handles projects with arbitrary branching +and version trees, taking a file describing the inputs and generating a +file compatible with the L<git-fast-import(1)> format. + +=head1 CONFIGURATION FILE + +=head2 Format + +The configuration file is based on the standard I<.ini> format. + + ; Comments start with semi-colons + [section] + key=value + +Please see below for information on how to escape special characters. + +=head2 Global configuration + +Global configuration is done in the B<[config]> section, which should be +the first section in the file. Configuration can be changed by +repeating configuration sections later on. + + [config] + ; configure conversion of CRLFs. "convert" means that all CRLFs + ; should be converted into LFs (suitable for the core.autocrlf + ; setting set to true in Git). "none" means that all data is + ; treated as binary. + crlf=convert + +=head2 Revision configuration + +Each revision that is to be imported is described in three +sections. Revisions should be defined in topological order, so +that a revision's parent has always been defined when a new revision +is introduced. All the sections for one revision must be defined +before defining the next revision. + +Each revision is assigned a unique numerical identifier. The +numbers do not need to be consecutive, nor monotonically +increasing. + +For instance, if your configuration file contains only the two +revisions 4711 and 42, where 4711 is the initial commit, the +only requirement is that 4711 is completely defined before 42. + +=pod + +=head3 Revision description section + +A section whose section name is just an integer gives meta-data +about the revision. + + [3] + ; author sets the author of the revisions + author=Peter Krefting <peter@softwolves.pp.se> + ; branch sets the branch that the revision should be committed to + branch=master + ; parent describes the revision that is the parent of this commit + ; (optional) + parent=1 + ; merges describes a revision that is merged into this commit + ; (optional; can be repeated) + merges=2 + ; selects one file to take the timestamp from + ; (optional; if unspecified, the most recent file from the .files + ; section is used) + timestamp=3/source.c + +=head3 Revision contents section + +A section whose section name is an integer followed by B<.files> +describe all the files included in this revision. If a file that +was available previously is not included in this revision, it will +be removed. + +If an on-disk revision is incomplete, you can point to files from +a previous revision. There are no restriction as to where the source +files are located, nor to the names of them. + + [3.files] + ; the key is the path inside the repository, the value is the path + ; as seen from the importer script. + source.c=ver-3.00/source.c + source.h=ver-2.99/source.h + readme.txt=ver-3.00/introduction to the project.txt + +File names are treated as byte strings (but please see below on +quoting rules), and should be stored in the configuration file in +the encoding that should be used in the generated repository. + +=head3 Revision commit message section + +A section whose section name is an integer followed by B<.message> +gives the commit message. This section is read verbatim, up until +the beginning of the next section. As such, a commit message may not +contain a line that begins with an opening square bracket ("[") and +ends with a closing square bracket ("]"), unless they are surrounded +by whitespace or other characters. + + [3.message] + Implement foobar. + ; trailing blank lines are ignored. + +=cut + +# Globals +use strict; +use integer; +my $crlfmode = 0; +my @revs; +my (%revmap, %message, %files, %author, %branch, %parent, %merges, %time, %timesource); +my $sectiontype = 0; +my $rev = 0; +my $mark = 1; + +# Check command line +if ($#ARGV < 1 || $ARGV[0] =~ /^--?h/) +{ + exec('perldoc', $0); + exit 1; +} + +# Open configuration +my $config = $ARGV[0]; +open CFG, '<', $config or die "Cannot open configuration file \"$config\": "; + +# Open output +my $output = $ARGV[1]; +open OUT, '>', $output or die "Cannot create output file \"$output\": "; +binmode OUT; + +LINE: while (my $line = <CFG>) +{ + $line =~ s/\r?\n$//; + next LINE if $sectiontype != 4 && $line eq ''; + next LINE if $line =~ /^;/; + my $oldsectiontype = $sectiontype; + my $oldrev = $rev; + + # Sections + if ($line =~ m"^\[(config|(\d+)(|\.files|\.message))\]$") + { + if ($1 eq 'config') + { + $sectiontype = 1; + } + elsif ($3 eq '') + { + $sectiontype = 2; + $rev = $2; + # Create a new revision + die "Duplicate rev: $line\n " if defined $revmap{$rev}; + print "Reading revision $rev\n"; + push @revs, $rev; + $revmap{$rev} = $mark ++; + $time{$revmap{$rev}} = 0; + } + elsif ($3 eq '.files') + { + $sectiontype = 3; + $rev = $2; + die "Revision mismatch: $line\n " unless $rev == $oldrev; + } + elsif ($3 eq '.message') + { + $sectiontype = 4; + $rev = $2; + die "Revision mismatch: $line\n " unless $rev == $oldrev; + } + else + { + die "Internal parse error: $line\n "; + } + next LINE; + } + + # Parse data + if ($sectiontype != 4) + { + # Key and value + if ($line =~ m"^\s*([^\s].*=.*[^\s])\s*$") + { + my ($key, $value) = &parsekeyvaluepair($1); + # Global configuration + if (1 == $sectiontype) + { + if ($key eq 'crlf') + { + $crlfmode = 1, next LINE if $value eq 'convert'; + $crlfmode = 0, next LINE if $value eq 'none'; + } + die "Unknown configuration option: $line\n "; + } + # Revision specification + if (2 == $sectiontype) + { + my $current = $revmap{$rev}; + $author{$current} = $value, next LINE if $key eq 'author'; + $branch{$current} = $value, next LINE if $key eq 'branch'; + $parent{$current} = $value, next LINE if $key eq 'parent'; + $timesource{$current} = $value, next LINE if $key eq 'timestamp'; + push(@{$merges{$current}}, $value), next LINE if $key eq 'merges'; + die "Unknown revision option: $line\n "; + } + # Filespecs + if (3 == $sectiontype) + { + # Add the file and create a marker + die "File not found: $line\n " unless -f $value; + my $current = $revmap{$rev}; + ${$files{$current}}{$key} = $mark; + my $time = &fileblob($value, $crlfmode, $mark ++); + + # Update revision timestamp if more recent than other + # files seen, or if this is the file we have selected + # to take the time stamp from using the "timestamp" + # directive. + if ((defined $timesource{$current} && $timesource{$current} eq $value) + || $time > $time{$current}) + { + $time{$current} = $time; + } + } + } + else + { + die "Parse error: $line\n "; + } + } + else + { + # Commit message + my $current = $revmap{$rev}; + if (defined $message{$current}) + { + $message{$current} .= "\n"; + } + $message{$current} .= $line; + } +} +close CFG; + +# Start spewing out data for git-fast-import +foreach my $commit (@revs) +{ + # Progress + print OUT "progress Creating revision $commit\n"; + + # Create commit header + my $mark = $revmap{$commit}; + + # Branch and commit id + print OUT "commit refs/heads/", $branch{$mark}, "\nmark :", $mark, "\n"; + + # Author and timestamp + die "No timestamp defined for $commit (no files?)\n" unless defined $time{$mark}; + print OUT "committer ", $author{$mark}, " ", $time{$mark}, " +0100\n"; + + # Commit message + die "No message defined for $commit\n" unless defined $message{$mark}; + my $message = $message{$mark}; + $message =~ s/\n$//; # Kill trailing empty line + print OUT "data ", length($message), "\n", $message, "\n"; + + # Parent and any merges + print OUT "from :", $revmap{$parent{$mark}}, "\n" if defined $parent{$mark}; + if (defined $merges{$mark}) + { + foreach my $merge (@{$merges{$mark}}) + { + print OUT "merge :", $revmap{$merge}, "\n"; + } + } + + # Output file marks + print OUT "deleteall\n"; # start from scratch + foreach my $file (sort keys %{$files{$mark}}) + { + print OUT "M 644 :", ${$files{$mark}}{$file}, " $file\n"; + } + print OUT "\n"; +} + +# Create one file blob +sub fileblob +{ + my ($filename, $crlfmode, $mark) = @_; + + # Import the file + print OUT "progress Importing $filename\nblob\nmark :$mark\n"; + open FILE, '<', $filename or die "Cannot read $filename\n "; + binmode FILE; + my ($size, $mtime) = (stat(FILE))[7,9]; + my $file; + read FILE, $file, $size; + close FILE; + $file =~ s/\r\n/\n/g if $crlfmode; + print OUT "data ", length($file), "\n", $file, "\n"; + + return $mtime; +} + +# Parse a key=value pair +sub parsekeyvaluepair +{ +=pod + +=head2 Escaping special characters + +Key and value strings may be enclosed in quotes, in which case +whitespace inside the quotes is preserved. Additionally, an equal +sign may be included in the key by preceeding it with a backslash. +For example: + + "key1 "=value1 + key2=" value2" + key\=3=value3 + key4=value=4 + "key5""=value5 + +Here the first key is "key1 " (note the trailing white-space) and the +second value is " value2" (note the leading white-space). The third +key contains an equal sign "key=3" and so does the fourth value, which +does not need to be escaped. The fifth key contains a trailing quote, +which does not need to be escaped since it is inside a surrounding +quote. + +=cut + my $pair = shift; + + # Separate key and value by the first non-quoted equal sign + my ($key, $value); + if ($pair =~ /^(.*[^\\])=(.*)$/) + { + ($key, $value) = ($1, $2) + } + else + { + die "Parse error: $pair\n "; + } + + # Unquote and unescape the key and value separately + return (&unescape($key), &unescape($value)); +} + +# Unquote and unescape +sub unescape +{ + my $string = shift; + + # First remove enclosing quotes. Backslash before the trailing + # quote leaves both. + if ($string =~ /^"(.*[^\\])"$/) + { + $string = $1; + } + + # Second remove any backslashes inside the unquoted string. + # For later: Handle special sequences like \t ? + $string =~ s/\\(.)/$1/g; + + return $string; +} + +__END__ + +=pod + +=head1 EXAMPLES + +B<import-directories.perl> F<project.import> + +=head1 AUTHOR + +Copyright 2008-2009 Peter Krefting E<lt>peter@softwolves.pp.se> + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation. + +=cut diff --git a/contrib/fast-import/import-tars.perl b/contrib/fast-import/import-tars.perl index 78e40d2a13..a909716682 100755 --- a/contrib/fast-import/import-tars.perl +++ b/contrib/fast-import/import-tars.perl @@ -8,9 +8,20 @@ ## perl import-tars.perl *.tar.bz2 ## git whatchanged import-tars ## +## Use --metainfo to specify the extension for a meta data file, where +## import-tars can read the commit message and optionally author and +## committer information. +## +## echo 'This is the commit message' > myfile.tar.bz2.msg +## perl import-tars.perl --metainfo=msg myfile.tar.bz2 use strict; -die "usage: import-tars *.tar.{gz,bz2,Z}\n" unless @ARGV; +use Getopt::Long; + +my $metaext = ''; + +die "usage: import-tars [--metainfo=extension] *.tar.{gz,bz2,Z}\n" + unless GetOptions('metainfo=s' => \$metaext) && @ARGV; my $branch_name = 'import-tars'; my $branch_ref = "refs/heads/$branch_name"; @@ -109,12 +120,43 @@ foreach my $tar_file (@ARGV) $have_top_dir = 0 if $top_dir ne $1; } + my $commit_msg = "Imported from $tar_file."; + my $this_committer_name = $committer_name; + my $this_committer_email = $committer_email; + my $this_author_name = $author_name; + my $this_author_email = $author_email; + if ($metaext ne '') { + # Optionally read a commit message from <filename.tar>.msg + # Add a line on the form "Committer: name <e-mail>" to override + # the committer and "Author: name <e-mail>" to override the + # author for this tar ball. + if (open MSG, '<', "${tar_file}.${metaext}") { + my $header_done = 0; + $commit_msg = ''; + while (<MSG>) { + if (!$header_done && /^Committer:\s+([^<>]*)\s+<(.*)>\s*$/i) { + $this_committer_name = $1; + $this_committer_email = $2; + } elsif (!$header_done && /^Author:\s+([^<>]*)\s+<(.*)>\s*$/i) { + $this_author_name = $1; + $this_author_email = $2; + } elsif (!$header_done && /^$/ { # empty line ends header. + $header_done = 1; + } else { + $commit_msg .= $_; + $header_done = 1; + } + } + close MSG; + } + } + print FI <<EOF; commit $branch_ref -author $author_name <$author_email> $author_time +0000 -committer $committer_name <$committer_email> $commit_time +0000 +author $this_author_name <$this_author_email> $author_time +0000 +committer $this_committer_name <$this_committer_email> $commit_time +0000 data <<END_OF_COMMIT_MESSAGE -Imported from $tar_file. +$commit_msg END_OF_COMMIT_MESSAGE deleteall |