diff options
Diffstat (limited to 'contrib/remote-helpers/git-remote-bzr')
-rwxr-xr-x | contrib/remote-helpers/git-remote-bzr | 959 |
1 files changed, 959 insertions, 0 deletions
diff --git a/contrib/remote-helpers/git-remote-bzr b/contrib/remote-helpers/git-remote-bzr new file mode 100755 index 0000000000..10300c63d1 --- /dev/null +++ b/contrib/remote-helpers/git-remote-bzr @@ -0,0 +1,959 @@ +#!/usr/bin/env python +# +# Copyright (c) 2012 Felipe Contreras +# + +# +# Just copy to your ~/bin, or anywhere in your $PATH. +# Then you can clone with: +# % git clone bzr::/path/to/bzr/repo/or/url +# +# For example: +# % git clone bzr::$HOME/myrepo +# or +# % git clone bzr::lp:myrepo +# +# If you want to specify which branches you want track (per repo): +# git config remote-bzr.branches 'trunk, devel, test' +# + +import sys + +import bzrlib +if hasattr(bzrlib, "initialize"): + bzrlib.initialize() + +import bzrlib.plugin +bzrlib.plugin.load_plugins() + +import bzrlib.generate_ids +import bzrlib.transport +import bzrlib.errors +import bzrlib.ui +import bzrlib.urlutils +import bzrlib.branch + +import sys +import os +import json +import re +import StringIO +import atexit, shutil, hashlib, urlparse, subprocess + +NAME_RE = re.compile('^([^<>]+)') +AUTHOR_RE = re.compile('^([^<>]+?)? ?<([^<>]*)>$') +EMAIL_RE = re.compile('^([^<>]+[^ \\\t<>])?\\b(?:[ \\t<>]*?)\\b([^ \\t<>]+@[^ \\t<>]+)') +RAW_AUTHOR_RE = re.compile('^(\w+) (.+)? <(.*)> (\d+) ([+-]\d+)') + +def die(msg, *args): + sys.stderr.write('ERROR: %s\n' % (msg % args)) + sys.exit(1) + +def warn(msg, *args): + sys.stderr.write('WARNING: %s\n' % (msg % args)) + +def gittz(tz): + return '%+03d%02d' % (tz / 3600, tz % 3600 / 60) + +def get_config(config): + cmd = ['git', 'config', '--get', config] + process = subprocess.Popen(cmd, stdout=subprocess.PIPE) + output, _ = process.communicate() + return output + +class Marks: + + def __init__(self, path): + self.path = path + self.tips = {} + self.marks = {} + self.rev_marks = {} + self.last_mark = 0 + self.load() + + def load(self): + if not os.path.exists(self.path): + return + + tmp = json.load(open(self.path)) + self.tips = tmp['tips'] + self.marks = tmp['marks'] + self.last_mark = tmp['last-mark'] + + for rev, mark in self.marks.iteritems(): + self.rev_marks[mark] = rev + + def dict(self): + return { 'tips': self.tips, 'marks': self.marks, 'last-mark' : self.last_mark } + + def store(self): + json.dump(self.dict(), open(self.path, 'w')) + + def __str__(self): + return str(self.dict()) + + def from_rev(self, rev): + return self.marks[rev] + + def to_rev(self, mark): + return str(self.rev_marks[mark]) + + def next_mark(self): + self.last_mark += 1 + return self.last_mark + + def get_mark(self, rev): + self.last_mark += 1 + self.marks[rev] = self.last_mark + return self.last_mark + + def is_marked(self, rev): + return rev in self.marks + + def new_mark(self, rev, mark): + self.marks[rev] = mark + self.rev_marks[mark] = rev + self.last_mark = mark + + def get_tip(self, branch): + return self.tips.get(branch, None) + + def set_tip(self, branch, tip): + self.tips[branch] = tip + +class Parser: + + def __init__(self, repo): + self.repo = repo + self.line = self.get_line() + + def get_line(self): + return sys.stdin.readline().strip() + + def __getitem__(self, i): + return self.line.split()[i] + + def check(self, word): + return self.line.startswith(word) + + def each_block(self, separator): + while self.line != separator: + yield self.line + self.line = self.get_line() + + def __iter__(self): + return self.each_block('') + + def next(self): + self.line = self.get_line() + if self.line == 'done': + self.line = None + + def get_mark(self): + i = self.line.index(':') + 1 + return int(self.line[i:]) + + def get_data(self): + if not self.check('data'): + return None + i = self.line.index(' ') + 1 + size = int(self.line[i:]) + return sys.stdin.read(size) + + def get_author(self): + m = RAW_AUTHOR_RE.match(self.line) + if not m: + return None + _, name, email, date, tz = m.groups() + committer = '%s <%s>' % (name, email) + tz = int(tz) + tz = ((tz / 100) * 3600) + ((tz % 100) * 60) + return (committer, int(date), tz) + +def rev_to_mark(rev): + global marks + return marks.from_rev(rev) + +def mark_to_rev(mark): + global marks + return marks.to_rev(mark) + +def fixup_user(user): + name = mail = None + user = user.replace('"', '') + m = AUTHOR_RE.match(user) + if m: + name = m.group(1) + mail = m.group(2).strip() + else: + m = EMAIL_RE.match(user) + if m: + name = m.group(1) + mail = m.group(2) + else: + m = NAME_RE.match(user) + if m: + name = m.group(1).strip() + + if not name: + name = 'unknown' + if not mail: + mail = 'Unknown' + + return '%s <%s>' % (name, mail) + +def get_filechanges(cur, prev): + modified = {} + removed = {} + + changes = cur.changes_from(prev) + + def u(s): + return s.encode('utf-8') + + for path, fid, kind in changes.added: + modified[u(path)] = fid + for path, fid, kind in changes.removed: + removed[u(path)] = None + for path, fid, kind, mod, _ in changes.modified: + modified[u(path)] = fid + for oldpath, newpath, fid, kind, mod, _ in changes.renamed: + removed[u(oldpath)] = None + if kind == 'directory': + lst = cur.list_files(from_dir=newpath, recursive=True) + for path, file_class, kind, fid, entry in lst: + if kind != 'directory': + modified[u(newpath + '/' + path)] = fid + else: + modified[u(newpath)] = fid + + return modified, removed + +def export_files(tree, files): + global marks, filenodes + + final = [] + for path, fid in files.iteritems(): + kind = tree.kind(fid) + + h = tree.get_file_sha1(fid) + + if kind == 'symlink': + d = tree.get_symlink_target(fid) + mode = '120000' + elif kind == 'file': + + if tree.is_executable(fid): + mode = '100755' + else: + mode = '100644' + + # is the blob already exported? + if h in filenodes: + mark = filenodes[h] + final.append((mode, mark, path)) + continue + + d = tree.get_file_text(fid) + elif kind == 'directory': + continue + else: + die("Unhandled kind '%s' for path '%s'" % (kind, path)) + + mark = marks.next_mark() + filenodes[h] = mark + + print "blob" + print "mark :%u" % mark + print "data %d" % len(d) + print d + + final.append((mode, mark, path)) + + return final + +def export_branch(repo, name): + global prefix + + ref = '%s/heads/%s' % (prefix, name) + tip = marks.get_tip(name) + + branch = bzrlib.branch.Branch.open(branches[name]) + repo = branch.repository + + branch.lock_read() + revs = branch.iter_merge_sorted_revisions(None, tip, 'exclude', 'forward') + try: + tip_revno = branch.revision_id_to_revno(tip) + last_revno, _ = branch.last_revision_info() + total = last_revno - tip_revno + except bzrlib.errors.NoSuchRevision: + tip_revno = 0 + total = 0 + + for revid, _, seq, _ in revs: + + if marks.is_marked(revid): + continue + + rev = repo.get_revision(revid) + revno = seq[0] + + parents = rev.parent_ids + time = rev.timestamp + tz = rev.timezone + committer = rev.committer.encode('utf-8') + committer = "%s %u %s" % (fixup_user(committer), time, gittz(tz)) + authors = rev.get_apparent_authors() + if authors: + author = authors[0].encode('utf-8') + author = "%s %u %s" % (fixup_user(author), time, gittz(tz)) + else: + author = committer + msg = rev.message.encode('utf-8') + + msg += '\n' + + if len(parents) == 0: + parent = bzrlib.revision.NULL_REVISION + else: + parent = parents[0] + + cur_tree = repo.revision_tree(revid) + prev = repo.revision_tree(parent) + modified, removed = get_filechanges(cur_tree, prev) + + modified_final = export_files(cur_tree, modified) + + if len(parents) == 0: + print 'reset %s' % ref + + print "commit %s" % ref + print "mark :%d" % (marks.get_mark(revid)) + print "author %s" % (author) + print "committer %s" % (committer) + print "data %d" % (len(msg)) + print msg + + for i, p in enumerate(parents): + try: + m = rev_to_mark(p) + except KeyError: + # ghost? + continue + if i == 0: + print "from :%s" % m + else: + print "merge :%s" % m + + for f in removed: + print "D %s" % (f,) + for f in modified_final: + print "M %s :%u %s" % f + print + + if len(seq) > 1: + # let's skip branch revisions from the progress report + continue + + progress = (revno - tip_revno) + if (progress % 100 == 0): + if total: + print "progress revision %d '%s' (%d/%d)" % (revno, name, progress, total) + else: + print "progress revision %d '%s' (%d)" % (revno, name, progress) + + branch.unlock() + + revid = branch.last_revision() + + # make sure the ref is updated + print "reset %s" % ref + print "from :%u" % rev_to_mark(revid) + print + + marks.set_tip(name, revid) + +def export_tag(repo, name): + global tags, prefix + + ref = '%s/tags/%s' % (prefix, name) + print "reset %s" % ref + print "from :%u" % rev_to_mark(tags[name]) + print + +def do_import(parser): + global dirname + + repo = parser.repo + path = os.path.join(dirname, 'marks-git') + + print "feature done" + if os.path.exists(path): + print "feature import-marks=%s" % path + print "feature export-marks=%s" % path + print "feature force" + sys.stdout.flush() + + while parser.check('import'): + ref = parser[1] + if ref.startswith('refs/heads/'): + name = ref[len('refs/heads/'):] + export_branch(repo, name) + if ref.startswith('refs/tags/'): + name = ref[len('refs/tags/'):] + export_tag(repo, name) + parser.next() + + print 'done' + + sys.stdout.flush() + +def parse_blob(parser): + global blob_marks + + parser.next() + mark = parser.get_mark() + parser.next() + data = parser.get_data() + blob_marks[mark] = data + parser.next() + +class CustomTree(): + + def __init__(self, branch, revid, parents, files): + global files_cache + + self.updates = {} + self.branch = branch + + def copy_tree(revid): + files = files_cache[revid] = {} + branch.lock_read() + tree = branch.repository.revision_tree(revid) + try: + for path, entry in tree.iter_entries_by_dir(): + files[path] = [entry.file_id, None] + finally: + branch.unlock() + return files + + if len(parents) == 0: + self.base_id = bzrlib.revision.NULL_REVISION + self.base_files = {} + else: + self.base_id = parents[0] + self.base_files = files_cache.get(self.base_id, None) + if not self.base_files: + self.base_files = copy_tree(self.base_id) + + self.files = files_cache[revid] = self.base_files.copy() + self.rev_files = {} + + for path, data in self.files.iteritems(): + fid, mark = data + self.rev_files[fid] = [path, mark] + + for path, f in files.iteritems(): + fid, mark = self.files.get(path, [None, None]) + if not fid: + fid = bzrlib.generate_ids.gen_file_id(path) + f['path'] = path + self.rev_files[fid] = [path, mark] + self.updates[fid] = f + + def last_revision(self): + return self.base_id + + def iter_changes(self): + changes = [] + + def get_parent(dirname, basename): + parent_fid, mark = self.base_files.get(dirname, [None, None]) + if parent_fid: + return parent_fid + parent_fid, mark = self.files.get(dirname, [None, None]) + if parent_fid: + return parent_fid + if basename == '': + return None + fid = bzrlib.generate_ids.gen_file_id(path) + add_entry(fid, dirname, 'directory') + return fid + + def add_entry(fid, path, kind, mode = None): + dirname, basename = os.path.split(path) + parent_fid = get_parent(dirname, basename) + + executable = False + if mode == '100755': + executable = True + elif mode == '120000': + kind = 'symlink' + + change = (fid, + (None, path), + True, + (False, True), + (None, parent_fid), + (None, basename), + (None, kind), + (None, executable)) + self.files[path] = [change[0], None] + changes.append(change) + + def update_entry(fid, path, kind, mode = None): + dirname, basename = os.path.split(path) + parent_fid = get_parent(dirname, basename) + + executable = False + if mode == '100755': + executable = True + elif mode == '120000': + kind = 'symlink' + + change = (fid, + (path, path), + True, + (True, True), + (None, parent_fid), + (None, basename), + (None, kind), + (None, executable)) + self.files[path] = [change[0], None] + changes.append(change) + + def remove_entry(fid, path, kind): + dirname, basename = os.path.split(path) + parent_fid = get_parent(dirname, basename) + change = (fid, + (path, None), + True, + (True, False), + (parent_fid, None), + (None, None), + (None, None), + (None, None)) + del self.files[path] + changes.append(change) + + for fid, f in self.updates.iteritems(): + path = f['path'] + + if 'deleted' in f: + remove_entry(fid, path, 'file') + continue + + if path in self.base_files: + update_entry(fid, path, 'file', f['mode']) + else: + add_entry(fid, path, 'file', f['mode']) + + self.files[path][1] = f['mark'] + self.rev_files[fid][1] = f['mark'] + + return changes + + def get_content(self, file_id): + path, mark = self.rev_files[file_id] + if mark: + return blob_marks[mark] + + # last resort + tree = self.branch.repository.revision_tree(self.base_id) + return tree.get_file_text(file_id) + + def get_file_with_stat(self, file_id, path=None): + content = self.get_content(file_id) + return (StringIO.StringIO(content), None) + + def get_symlink_target(self, file_id): + return self.get_content(file_id) + + def id2path(self, file_id): + path, mark = self.rev_files[file_id] + return path + +def c_style_unescape(string): + if string[0] == string[-1] == '"': + return string.decode('string-escape')[1:-1] + return string + +def parse_commit(parser): + global marks, blob_marks, parsed_refs + global mode + + parents = [] + + ref = parser[1] + parser.next() + + if ref.startswith('refs/heads/'): + name = ref[len('refs/heads/'):] + branch = bzrlib.branch.Branch.open(branches[name]) + else: + die('unknown ref') + + commit_mark = parser.get_mark() + parser.next() + author = parser.get_author() + parser.next() + committer = parser.get_author() + parser.next() + data = parser.get_data() + parser.next() + if parser.check('from'): + parents.append(parser.get_mark()) + parser.next() + while parser.check('merge'): + parents.append(parser.get_mark()) + parser.next() + + # fast-export adds an extra newline + if data[-1] == '\n': + data = data[:-1] + + files = {} + + for line in parser: + if parser.check('M'): + t, m, mark_ref, path = line.split(' ', 3) + mark = int(mark_ref[1:]) + f = { 'mode' : m, 'mark' : mark } + elif parser.check('D'): + t, path = line.split(' ') + f = { 'deleted' : True } + else: + die('Unknown file command: %s' % line) + path = c_style_unescape(path).decode('utf-8') + files[path] = f + + committer, date, tz = committer + parents = [mark_to_rev(p) for p in parents] + revid = bzrlib.generate_ids.gen_revision_id(committer, date) + props = {} + props['branch-nick'] = branch.nick + + mtree = CustomTree(branch, revid, parents, files) + changes = mtree.iter_changes() + + branch.lock_write() + try: + builder = branch.get_commit_builder(parents, None, date, tz, committer, props, revid) + try: + list(builder.record_iter_changes(mtree, mtree.last_revision(), changes)) + builder.finish_inventory() + builder.commit(data.decode('utf-8', 'replace')) + except Exception, e: + builder.abort() + raise + finally: + branch.unlock() + + parsed_refs[ref] = revid + marks.new_mark(revid, commit_mark) + +def parse_reset(parser): + global parsed_refs + + ref = parser[1] + parser.next() + + # ugh + if parser.check('commit'): + parse_commit(parser) + return + if not parser.check('from'): + return + from_mark = parser.get_mark() + parser.next() + + parsed_refs[ref] = mark_to_rev(from_mark) + +def do_export(parser): + global parsed_refs, dirname + + parser.next() + + for line in parser.each_block('done'): + if parser.check('blob'): + parse_blob(parser) + elif parser.check('commit'): + parse_commit(parser) + elif parser.check('reset'): + parse_reset(parser) + elif parser.check('tag'): + pass + elif parser.check('feature'): + pass + else: + die('unhandled export command: %s' % line) + + for ref, revid in parsed_refs.iteritems(): + if ref.startswith('refs/heads/'): + name = ref[len('refs/heads/'):] + branch = bzrlib.branch.Branch.open(branches[name]) + branch.generate_revision_history(revid, marks.get_tip(name)) + + if name in peers: + peer = bzrlib.branch.Branch.open(peers[name]) + try: + peer.bzrdir.push_branch(branch, revision_id=revid) + except bzrlib.errors.DivergedBranches: + print "error %s non-fast forward" % ref + continue + + try: + wt = branch.bzrdir.open_workingtree() + wt.update() + except bzrlib.errors.NoWorkingTree: + pass + elif ref.startswith('refs/tags/'): + # TODO: implement tag push + print "error %s pushing tags not supported" % ref + continue + else: + # transport-helper/fast-export bugs + continue + + print "ok %s" % ref + + print + +def do_capabilities(parser): + global dirname + + print "import" + print "export" + print "refspec refs/heads/*:%s/heads/*" % prefix + print "refspec refs/tags/*:%s/tags/*" % prefix + + path = os.path.join(dirname, 'marks-git') + + if os.path.exists(path): + print "*import-marks %s" % path + print "*export-marks %s" % path + + print + +def ref_is_valid(name): + return not True in [c in name for c in '~^: \\'] + +def do_list(parser): + global tags + + master_branch = None + + for name in branches: + if not master_branch: + master_branch = name + print "? refs/heads/%s" % name + + branch = bzrlib.branch.Branch.open(branches[master_branch]) + branch.lock_read() + for tag, revid in branch.tags.get_tag_dict().items(): + try: + branch.revision_id_to_dotted_revno(revid) + except bzrlib.errors.NoSuchRevision: + continue + if not ref_is_valid(tag): + continue + print "? refs/tags/%s" % tag + tags[tag] = revid + branch.unlock() + + print "@refs/heads/%s HEAD" % master_branch + print + +def get_remote_branch(origin, remote_branch, name): + global dirname, peers + + branch_path = os.path.join(dirname, 'clone', name) + if os.path.exists(branch_path): + # pull + d = bzrlib.bzrdir.BzrDir.open(branch_path) + branch = d.open_branch() + try: + branch.pull(remote_branch, [], None, False) + except bzrlib.errors.DivergedBranches: + # use remote branch for now + return remote_branch + else: + # clone + d = origin.sprout(branch_path, None, + hardlink=True, create_tree_if_local=False, + force_new_repo=False, + source_branch=remote_branch) + branch = d.open_branch() + + return branch + +def find_branches(repo, wanted): + transport = repo.bzrdir.root_transport + + for fn in transport.iter_files_recursive(): + if not fn.endswith('.bzr/branch-format'): + continue + + name = subdir = fn[:-len('/.bzr/branch-format')] + name = name if name != '' else 'master' + name = name.replace('/', '+') + + if wanted and not name in wanted: + continue + + try: + cur = transport.clone(subdir) + branch = bzrlib.branch.Branch.open_from_transport(cur) + except bzrlib.errors.NotBranchError: + continue + else: + yield name, branch + +def get_repo(url, alias): + global dirname, peer, branches + + normal_url = bzrlib.urlutils.normalize_url(url) + origin = bzrlib.bzrdir.BzrDir.open(url) + is_local = isinstance(origin.transport, bzrlib.transport.local.LocalTransport) + + shared_path = os.path.join(gitdir, 'bzr') + try: + shared_dir = bzrlib.bzrdir.BzrDir.open(shared_path) + except bzrlib.errors.NotBranchError: + shared_dir = bzrlib.bzrdir.BzrDir.create(shared_path) + try: + shared_repo = shared_dir.open_repository() + except bzrlib.errors.NoRepositoryPresent: + shared_repo = shared_dir.create_repository(shared=True) + + if not is_local: + clone_path = os.path.join(dirname, 'clone') + if not os.path.exists(clone_path): + os.mkdir(clone_path) + else: + # check and remove old organization + try: + bdir = bzrlib.bzrdir.BzrDir.open(clone_path) + bdir.destroy_repository() + except bzrlib.errors.NotBranchError: + pass + except bzrlib.errors.NoRepositoryPresent: + pass + + try: + repo = origin.open_repository() + if not repo.user_transport.listable(): + # this repository is not usable for us + raise bzrlib.errors.NoRepositoryPresent(repo.bzrdir) + except bzrlib.errors.NoRepositoryPresent: + # branch + + name = 'master' + remote_branch = origin.open_branch() + + if not is_local: + peers[name] = remote_branch.base + branch = get_remote_branch(origin, remote_branch, name) + else: + branch = remote_branch + + branches[name] = branch.base + + return branch.repository + else: + # repository + + wanted = get_config('remote-bzr.branches').rstrip().split(', ') + # stupid python + wanted = [e for e in wanted if e] + + for name, remote_branch in find_branches(repo, wanted): + + if not is_local: + peers[name] = remote_branch.base + branch = get_remote_branch(origin, remote_branch, name) + else: + branch = remote_branch + + branches[name] = branch.base + + return repo + +def fix_path(alias, orig_url): + url = urlparse.urlparse(orig_url, 'file') + if url.scheme != 'file' or os.path.isabs(url.path): + return + abs_url = urlparse.urljoin("%s/" % os.getcwd(), orig_url) + cmd = ['git', 'config', 'remote.%s.url' % alias, "bzr::%s" % abs_url] + subprocess.call(cmd) + +def main(args): + global marks, prefix, gitdir, dirname + global tags, filenodes + global blob_marks + global parsed_refs + global files_cache + global is_tmp + global branches, peers + + alias = args[1] + url = args[2] + + tags = {} + filenodes = {} + blob_marks = {} + parsed_refs = {} + files_cache = {} + marks = None + branches = {} + peers = {} + + if alias[5:] == url: + is_tmp = True + alias = hashlib.sha1(alias).hexdigest() + else: + is_tmp = False + + prefix = 'refs/bzr/%s' % alias + gitdir = os.environ['GIT_DIR'] + dirname = os.path.join(gitdir, 'bzr', alias) + + if not is_tmp: + fix_path(alias, url) + + if not os.path.exists(dirname): + os.makedirs(dirname) + + if hasattr(bzrlib.ui.ui_factory, 'be_quiet'): + bzrlib.ui.ui_factory.be_quiet(True) + + repo = get_repo(url, alias) + + marks_path = os.path.join(dirname, 'marks-int') + marks = Marks(marks_path) + + parser = Parser(repo) + for line in parser: + if parser.check('capabilities'): + do_capabilities(parser) + elif parser.check('list'): + do_list(parser) + elif parser.check('import'): + do_import(parser) + elif parser.check('export'): + do_export(parser) + else: + die('unhandled command: %s' % line) + sys.stdout.flush() + +def bye(): + if not marks: + return + if not is_tmp: + marks.store() + else: + shutil.rmtree(dirname) + +atexit.register(bye) +sys.exit(main(sys.argv)) |