diff options
Diffstat (limited to 'git_remote_helpers/git/git.py')
-rw-r--r-- | git_remote_helpers/git/git.py | 678 |
1 files changed, 678 insertions, 0 deletions
diff --git a/git_remote_helpers/git/git.py b/git_remote_helpers/git/git.py new file mode 100644 index 0000000000..007a1bfdf3 --- /dev/null +++ b/git_remote_helpers/git/git.py @@ -0,0 +1,678 @@ +#!/usr/bin/env python + +"""Functionality for interacting with Git repositories. + +This module provides classes for interfacing with a Git repository. +""" + +import os +import re +import time +from binascii import hexlify +from cStringIO import StringIO +import unittest + +from git_remote_helpers.util import debug, error, die, start_command, run_command + + +def get_git_dir (): + """Return the path to the GIT_DIR for this repo.""" + args = ("git", "rev-parse", "--git-dir") + exit_code, output, errors = run_command(args) + if exit_code: + die("Failed to retrieve git dir") + assert not errors + return output.strip() + + +def parse_git_config (): + """Return a dict containing the parsed version of 'git config -l'.""" + exit_code, output, errors = run_command(("git", "config", "-z", "-l")) + if exit_code: + die("Failed to retrieve git configuration") + assert not errors + return dict([e.split('\n', 1) for e in output.split("\0") if e]) + + +def git_config_bool (value): + """Convert the given git config string value to True or False. + + Raise ValueError if the given string was not recognized as a + boolean value. + + """ + norm_value = str(value).strip().lower() + if norm_value in ("true", "1", "yes", "on", ""): + return True + if norm_value in ("false", "0", "no", "off", "none"): + return False + raise ValueError("Failed to parse '%s' into a boolean value" % (value)) + + +def valid_git_ref (ref_name): + """Return True iff the given ref name is a valid git ref name.""" + # The following is a reimplementation of the git check-ref-format + # command. The rules were derived from the git check-ref-format(1) + # manual page. This code should be replaced by a call to + # check_refname_format() in the git library, when such is available. + if ref_name.endswith('/') or \ + ref_name.startswith('.') or \ + ref_name.count('/.') or \ + ref_name.count('..') or \ + ref_name.endswith('.lock'): + return False + for c in ref_name: + if ord(c) < 0x20 or ord(c) == 0x7f or c in " ~^:?*[": + return False + return True + + +class GitObjectFetcher(object): + + """Provide parsed access to 'git cat-file --batch'. + + This provides a read-only interface to the Git object database. + + """ + + def __init__ (self): + """Initiate a 'git cat-file --batch' session.""" + self.queue = [] # List of object names to be submitted + self.in_transit = None # Object name currently in transit + + # 'git cat-file --batch' produces binary output which is likely + # to be corrupted by the default "rU"-mode pipe opened by + # start_command. (Mode == "rU" does universal new-line + # conversion, which mangles carriage returns.) Therefore, we + # open an explicitly binary-safe pipe for transferring the + # output from 'git cat-file --batch'. + pipe_r_fd, pipe_w_fd = os.pipe() + pipe_r = os.fdopen(pipe_r_fd, "rb") + pipe_w = os.fdopen(pipe_w_fd, "wb") + self.proc = start_command(("git", "cat-file", "--batch"), + stdout = pipe_w) + self.f = pipe_r + + def __del__ (self): + """Verify completed communication with 'git cat-file --batch'.""" + assert not self.queue + assert self.in_transit is None + self.proc.stdin.close() + assert self.proc.wait() == 0 # Zero exit code + assert self.f.read() == "" # No remaining output + + def _submit_next_object (self): + """Submit queue items to the 'git cat-file --batch' process. + + If there are items in the queue, and there is currently no item + currently in 'transit', then pop the first item off the queue, + and submit it. + + """ + if self.queue and self.in_transit is None: + self.in_transit = self.queue.pop(0) + print >> self.proc.stdin, self.in_transit[0] + + def push (self, obj, callback): + """Push the given object name onto the queue. + + The given callback function will at some point in the future + be called exactly once with the following arguments: + - self - this GitObjectFetcher instance + - obj - the object name provided to push() + - sha1 - the SHA1 of the object, if 'None' obj is missing + - t - the type of the object (tag/commit/tree/blob) + - size - the size of the object in bytes + - data - the object contents + + """ + self.queue.append((obj, callback)) + self._submit_next_object() # (Re)start queue processing + + def process_next_entry (self): + """Read the next entry off the queue and invoke callback.""" + obj, cb = self.in_transit + self.in_transit = None + header = self.f.readline() + if header == "%s missing\n" % (obj): + cb(self, obj, None, None, None, None) + return + sha1, t, size = header.split(" ") + assert len(sha1) == 40 + assert t in ("tag", "commit", "tree", "blob") + assert size.endswith("\n") + size = int(size.strip()) + data = self.f.read(size) + assert self.f.read(1) == "\n" + cb(self, obj, sha1, t, size, data) + self._submit_next_object() + + def process (self): + """Process the current queue until empty.""" + while self.in_transit is not None: + self.process_next_entry() + + # High-level convenience methods: + + def get_sha1 (self, objspec): + """Return the SHA1 of the object specified by 'objspec'. + + Return None if 'objspec' does not specify an existing object. + + """ + class _ObjHandler(object): + """Helper class for getting the returned SHA1.""" + def __init__ (self, parser): + self.parser = parser + self.sha1 = None + + def __call__ (self, parser, obj, sha1, t, size, data): + # FIXME: Many unused arguments. Could this be cheaper? + assert parser == self.parser + self.sha1 = sha1 + + handler = _ObjHandler(self) + self.push(objspec, handler) + self.process() + return handler.sha1 + + def open_obj (self, objspec): + """Return a file object wrapping the contents of a named object. + + The caller is responsible for calling .close() on the returned + file object. + + Raise KeyError if 'objspec' does not exist in the repo. + + """ + class _ObjHandler(object): + """Helper class for parsing the returned git object.""" + def __init__ (self, parser): + """Set up helper.""" + self.parser = parser + self.contents = StringIO() + self.err = None + + def __call__ (self, parser, obj, sha1, t, size, data): + """Git object callback (see GitObjectFetcher documentation).""" + assert parser == self.parser + if not sha1: # Missing object + self.err = "Missing object '%s'" % obj + else: + assert size == len(data) + self.contents.write(data) + + handler = _ObjHandler(self) + self.push(objspec, handler) + self.process() + if handler.err: + raise KeyError(handler.err) + handler.contents.seek(0) + return handler.contents + + def walk_tree (self, tree_objspec, callback, prefix = ""): + """Recursively walk the given Git tree object. + + Recursively walk all subtrees of the given tree object, and + invoke the given callback passing three arguments: + (path, mode, data) with the path, permission bits, and contents + of all the blobs found in the entire tree structure. + + """ + class _ObjHandler(object): + """Helper class for walking a git tree structure.""" + def __init__ (self, parser, cb, path, mode = None): + """Set up helper.""" + self.parser = parser + self.cb = cb + self.path = path + self.mode = mode + self.err = None + + def parse_tree (self, treedata): + """Parse tree object data, yield tree entries. + + Each tree entry is a 3-tuple (mode, sha1, path) + + self.path is prepended to all paths yielded + from this method. + + """ + while treedata: + mode = int(treedata[:6], 10) + # Turn 100xxx into xxx + if mode > 100000: + mode -= 100000 + assert treedata[6] == " " + i = treedata.find("\0", 7) + assert i > 0 + path = treedata[7:i] + sha1 = hexlify(treedata[i + 1: i + 21]) + yield (mode, sha1, self.path + path) + treedata = treedata[i + 21:] + + def __call__ (self, parser, obj, sha1, t, size, data): + """Git object callback (see GitObjectFetcher documentation).""" + assert parser == self.parser + if not sha1: # Missing object + self.err = "Missing object '%s'" % (obj) + return + assert size == len(data) + if t == "tree": + if self.path: + self.path += "/" + # Recurse into all blobs and subtrees + for m, s, p in self.parse_tree(data): + parser.push(s, + self.__class__(self.parser, self.cb, p, m)) + elif t == "blob": + self.cb(self.path, self.mode, data) + else: + raise ValueError("Unknown object type '%s'" % (t)) + + self.push(tree_objspec, _ObjHandler(self, callback, prefix)) + self.process() + + +class GitRefMap(object): + + """Map Git ref names to the Git object names they currently point to. + + Behaves like a dictionary of Git ref names -> Git object names. + + """ + + def __init__ (self, obj_fetcher): + """Create a new Git ref -> object map.""" + self.obj_fetcher = obj_fetcher + self._cache = {} # dict: refname -> objname + + def _load (self, ref): + """Retrieve the object currently bound to the given ref. + + The name of the object pointed to by the given ref is stored + into this mapping, and also returned. + + """ + if ref not in self._cache: + self._cache[ref] = self.obj_fetcher.get_sha1(ref) + return self._cache[ref] + + def __contains__ (self, refname): + """Return True if the given refname is present in this cache.""" + return bool(self._load(refname)) + + def __getitem__ (self, refname): + """Return the git object name pointed to by the given refname.""" + commit = self._load(refname) + if commit is None: + raise KeyError("Unknown ref '%s'" % (refname)) + return commit + + def get (self, refname, default = None): + """Return the git object name pointed to by the given refname.""" + commit = self._load(refname) + if commit is None: + return default + return commit + + +class GitFICommit(object): + + """Encapsulate the data in a Git fast-import commit command.""" + + SHA1RE = re.compile(r'^[0-9a-f]{40}$') + + @classmethod + def parse_mode (cls, mode): + """Verify the given git file mode, and return it as a string.""" + assert mode in (644, 755, 100644, 100755, 120000) + return "%i" % (mode) + + @classmethod + def parse_objname (cls, objname): + """Return the given object name (or mark number) as a string.""" + if isinstance(objname, int): # Object name is a mark number + assert objname > 0 + return ":%i" % (objname) + + # No existence check is done, only checks for valid format + assert cls.SHA1RE.match(objname) # Object name is valid SHA1 + return objname + + @classmethod + def quote_path (cls, path): + """Return a quoted version of the given path.""" + path = path.replace("\\", "\\\\") + path = path.replace("\n", "\\n") + path = path.replace('"', '\\"') + return '"%s"' % (path) + + @classmethod + def parse_path (cls, path): + """Verify that the given path is valid, and quote it, if needed.""" + assert not isinstance(path, int) # Cannot be a mark number + + # These checks verify the rules on the fast-import man page + assert not path.count("//") + assert not path.endswith("/") + assert not path.startswith("/") + assert not path.count("/./") + assert not path.count("/../") + assert not path.endswith("/.") + assert not path.endswith("/..") + assert not path.startswith("./") + assert not path.startswith("../") + + if path.count('"') + path.count('\n') + path.count('\\'): + return cls.quote_path(path) + return path + + def __init__ (self, name, email, timestamp, timezone, message): + """Create a new Git fast-import commit, with the given metadata.""" + self.name = name + self.email = email + self.timestamp = timestamp + self.timezone = timezone + self.message = message + self.pathops = [] # List of path operations in this commit + + def modify (self, mode, blobname, path): + """Add a file modification to this Git fast-import commit.""" + self.pathops.append(("M", + self.parse_mode(mode), + self.parse_objname(blobname), + self.parse_path(path))) + + def delete (self, path): + """Add a file deletion to this Git fast-import commit.""" + self.pathops.append(("D", self.parse_path(path))) + + def copy (self, path, newpath): + """Add a file copy to this Git fast-import commit.""" + self.pathops.append(("C", + self.parse_path(path), + self.parse_path(newpath))) + + def rename (self, path, newpath): + """Add a file rename to this Git fast-import commit.""" + self.pathops.append(("R", + self.parse_path(path), + self.parse_path(newpath))) + + def note (self, blobname, commit): + """Add a note object to this Git fast-import commit.""" + self.pathops.append(("N", + self.parse_objname(blobname), + self.parse_objname(commit))) + + def deleteall (self): + """Delete all files in this Git fast-import commit.""" + self.pathops.append("deleteall") + + +class TestGitFICommit(unittest.TestCase): + + """GitFICommit selftests.""" + + def test_basic (self): + """GitFICommit basic selftests.""" + + def expect_fail (method, data): + """Verify that the method(data) raises an AssertionError.""" + try: + method(data) + except AssertionError: + return + raise AssertionError("Failed test for invalid data '%s(%s)'" % + (method.__name__, repr(data))) + + def test_parse_mode (self): + """GitFICommit.parse_mode() selftests.""" + self.assertEqual(GitFICommit.parse_mode(644), "644") + self.assertEqual(GitFICommit.parse_mode(755), "755") + self.assertEqual(GitFICommit.parse_mode(100644), "100644") + self.assertEqual(GitFICommit.parse_mode(100755), "100755") + self.assertEqual(GitFICommit.parse_mode(120000), "120000") + self.assertRaises(AssertionError, GitFICommit.parse_mode, 0) + self.assertRaises(AssertionError, GitFICommit.parse_mode, 123) + self.assertRaises(AssertionError, GitFICommit.parse_mode, 600) + self.assertRaises(AssertionError, GitFICommit.parse_mode, "644") + self.assertRaises(AssertionError, GitFICommit.parse_mode, "abc") + + def test_parse_objname (self): + """GitFICommit.parse_objname() selftests.""" + self.assertEqual(GitFICommit.parse_objname(1), ":1") + self.assertRaises(AssertionError, GitFICommit.parse_objname, 0) + self.assertRaises(AssertionError, GitFICommit.parse_objname, -1) + self.assertEqual(GitFICommit.parse_objname("0123456789" * 4), + "0123456789" * 4) + self.assertEqual(GitFICommit.parse_objname("2468abcdef" * 4), + "2468abcdef" * 4) + self.assertRaises(AssertionError, GitFICommit.parse_objname, + "abcdefghij" * 4) + + def test_parse_path (self): + """GitFICommit.parse_path() selftests.""" + self.assertEqual(GitFICommit.parse_path("foo/bar"), "foo/bar") + self.assertEqual(GitFICommit.parse_path("path/with\n and \" in it"), + '"path/with\\n and \\" in it"') + self.assertRaises(AssertionError, GitFICommit.parse_path, 1) + self.assertRaises(AssertionError, GitFICommit.parse_path, 0) + self.assertRaises(AssertionError, GitFICommit.parse_path, -1) + self.assertRaises(AssertionError, GitFICommit.parse_path, "foo//bar") + self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/") + self.assertRaises(AssertionError, GitFICommit.parse_path, "/foo/bar") + self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/./bar") + self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/../bar") + self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/.") + self.assertRaises(AssertionError, GitFICommit.parse_path, "foo/bar/..") + self.assertRaises(AssertionError, GitFICommit.parse_path, "./foo/bar") + self.assertRaises(AssertionError, GitFICommit.parse_path, "../foo/bar") + + +class GitFastImport(object): + + """Encapsulate communication with git fast-import.""" + + def __init__ (self, f, obj_fetcher, last_mark = 0): + """Set up self to communicate with a fast-import process through f.""" + self.f = f # File object where fast-import stream is written + self.obj_fetcher = obj_fetcher # GitObjectFetcher instance + self.next_mark = last_mark + 1 # Next mark number + self.refs = set() # Keep track of the refnames we've seen + + def comment (self, s): + """Write the given comment in the fast-import stream.""" + assert "\n" not in s, "Malformed comment: '%s'" % (s) + self.f.write("# %s\n" % (s)) + + def commit (self, ref, commitdata): + """Make a commit on the given ref, with the given GitFICommit. + + Return the mark number identifying this commit. + + """ + self.f.write("""\ +commit %(ref)s +mark :%(mark)i +committer %(name)s <%(email)s> %(timestamp)i %(timezone)s +data %(msgLength)i +%(msg)s +""" % { + 'ref': ref, + 'mark': self.next_mark, + 'name': commitdata.name, + 'email': commitdata.email, + 'timestamp': commitdata.timestamp, + 'timezone': commitdata.timezone, + 'msgLength': len(commitdata.message), + 'msg': commitdata.message, +}) + + if ref not in self.refs: + self.refs.add(ref) + parent = ref + "^0" + if self.obj_fetcher.get_sha1(parent): + self.f.write("from %s\n" % (parent)) + + for op in commitdata.pathops: + self.f.write(" ".join(op)) + self.f.write("\n") + self.f.write("\n") + retval = self.next_mark + self.next_mark += 1 + return retval + + def blob (self, data): + """Import the given blob. + + Return the mark number identifying this blob. + + """ + self.f.write("blob\nmark :%i\ndata %i\n%s\n" % + (self.next_mark, len(data), data)) + retval = self.next_mark + self.next_mark += 1 + return retval + + def reset (self, ref, objname): + """Reset the given ref to point at the given Git object.""" + self.f.write("reset %s\nfrom %s\n\n" % + (ref, GitFICommit.parse_objname(objname))) + if ref not in self.refs: + self.refs.add(ref) + + +class GitNotes(object): + + """Encapsulate access to Git notes. + + Simulates a dictionary of object name (SHA1) -> Git note mappings. + + """ + + def __init__ (self, notes_ref, obj_fetcher): + """Create a new Git notes interface, bound to the given notes ref.""" + self.notes_ref = notes_ref + self.obj_fetcher = obj_fetcher # Used to get objects from repo + self.imports = [] # list: (objname, note data blob name) tuples + + def __del__ (self): + """Verify that self.commit_notes() was called before destruction.""" + if self.imports: + error("Missing call to self.commit_notes().") + error("%i notes are not committed!", len(self.imports)) + + def _load (self, objname): + """Return the note data associated with the given git object. + + The note data is returned in string form. If no note is found + for the given object, None is returned. + + """ + try: + f = self.obj_fetcher.open_obj("%s:%s" % (self.notes_ref, objname)) + ret = f.read() + f.close() + except KeyError: + ret = None + return ret + + def __getitem__ (self, objname): + """Return the note contents associated with the given object. + + Raise KeyError if given object has no associated note. + + """ + blobdata = self._load(objname) + if blobdata is None: + raise KeyError("Object '%s' has no note" % (objname)) + return blobdata + + def get (self, objname, default = None): + """Return the note contents associated with the given object. + + Return given default if given object has no associated note. + + """ + blobdata = self._load(objname) + if blobdata is None: + return default + return blobdata + + def import_note (self, objname, data, gfi): + """Tell git fast-import to store data as a note for objname. + + This method uses the given GitFastImport object to create a + blob containing the given note data. Also an entry mapping the + given object name to the created blob is stored until + commit_notes() is called. + + Note that this method only works if it is later followed by a + call to self.commit_notes() (which produces the note commit + that refers to the blob produced here). + + """ + if not data.endswith("\n"): + data += "\n" + gfi.comment("Importing note for object %s" % (objname)) + mark = gfi.blob(data) + self.imports.append((objname, mark)) + + def commit_notes (self, gfi, author, message): + """Produce a git fast-import note commit for the imported notes. + + This method uses the given GitFastImport object to create a + commit on the notes ref, introducing the notes previously + submitted to import_note(). + + """ + if not self.imports: + return + commitdata = GitFICommit(author[0], author[1], + time.time(), "0000", message) + for objname, blobname in self.imports: + assert isinstance(objname, int) and objname > 0 + assert isinstance(blobname, int) and blobname > 0 + commitdata.note(blobname, objname) + gfi.commit(self.notes_ref, commitdata) + self.imports = [] + + +class GitCachedNotes(GitNotes): + + """Encapsulate access to Git notes (cached version). + + Only use this class if no caching is done at a higher level. + + Simulates a dictionary of object name (SHA1) -> Git note mappings. + + """ + + def __init__ (self, notes_ref, obj_fetcher): + """Set up a caching wrapper around GitNotes.""" + GitNotes.__init__(self, notes_ref, obj_fetcher) + self._cache = {} # Cache: object name -> note data + + def __del__ (self): + """Verify that GitNotes' destructor is called.""" + GitNotes.__del__(self) + + def _load (self, objname): + """Extend GitNotes._load() with a local objname -> note cache.""" + if objname not in self._cache: + self._cache[objname] = GitNotes._load(self, objname) + return self._cache[objname] + + def import_note (self, objname, data, gfi): + """Extend GitNotes.import_note() with a local objname -> note cache.""" + if not data.endswith("\n"): + data += "\n" + assert objname not in self._cache + self._cache[objname] = data + GitNotes.import_note(self, objname, data, gfi) + + +if __name__ == '__main__': + unittest.main() |