diff options
Diffstat (limited to 'git-p4.py')
-rwxr-xr-x | git-p4.py | 423 |
1 files changed, 367 insertions, 56 deletions
@@ -22,6 +22,9 @@ import platform import re import shutil import stat +import zipfile +import zlib +import ctypes try: from subprocess import CalledProcessError @@ -43,6 +46,9 @@ verbose = False # Only labels/tags matching this will be imported/exported defaultLabelRegexp = r'[a-zA-Z0-9_\-.]+$' +# Grab changes in blocks of this many revisions, unless otherwise requested +defaultBlockSize = 512 + def p4_build_cmd(cmd): """Build a suitable p4 command line. @@ -101,6 +107,16 @@ def chdir(path, is_client_path=False): path = os.getcwd() os.environ['PWD'] = path +def calcDiskFree(): + """Return free space in bytes on the disk of the given dirname.""" + if platform.system() == 'Windows': + free_bytes = ctypes.c_ulonglong(0) + ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(os.getcwd()), None, None, ctypes.pointer(free_bytes)) + return free_bytes.value + else: + st = os.statvfs(os.getcwd()) + return st.f_bavail * st.f_frsize + def die(msg): if verbose: raise Exception(msg) @@ -131,13 +147,11 @@ def read_pipe(c, ignore_error=False): sys.stderr.write('Reading pipe: %s\n' % str(c)) expand = isinstance(c,basestring) - p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand) - pipe = p.stdout - val = pipe.read() - if p.wait() and not ignore_error: - die('Command failed: %s' % str(c)) - - return val + p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=expand) + (out, err) = p.communicate() + if p.returncode != 0 and not ignore_error: + die('Command failed: %s\nError: %s' % (str(c), err)) + return out def p4_read_pipe(c, ignore_error=False): real_cmd = p4_build_cmd(c) @@ -249,6 +263,10 @@ def p4_reopen(type, f): def p4_move(src, dest): p4_system(["move", "-k", wildcard_encode(src), wildcard_encode(dest)]) +def p4_last_change(): + results = p4CmdList(["changes", "-m", "1"]) + return int(results[0]['change']) + def p4_describe(change): """Make sure it returns a valid result by checking for the presence of field "time". Return a dict of the @@ -597,9 +615,12 @@ def gitBranchExists(branch): _gitConfig = {} -def gitConfig(key): +def gitConfig(key, typeSpecifier=None): if not _gitConfig.has_key(key): - cmd = [ "git", "config", key ] + cmd = [ "git", "config" ] + if typeSpecifier: + cmd += [ typeSpecifier ] + cmd += [ key ] s = read_pipe(cmd, ignore_error=True) _gitConfig[key] = s.strip() return _gitConfig[key] @@ -610,16 +631,26 @@ def gitConfigBool(key): in the config.""" if not _gitConfig.has_key(key): - cmd = [ "git", "config", "--bool", key ] + _gitConfig[key] = gitConfig(key, '--bool') == "true" + return _gitConfig[key] + +def gitConfigInt(key): + if not _gitConfig.has_key(key): + cmd = [ "git", "config", "--int", key ] s = read_pipe(cmd, ignore_error=True) v = s.strip() - _gitConfig[key] = v == "true" + try: + _gitConfig[key] = int(gitConfig(key, '--int')) + except ValueError: + _gitConfig[key] = None return _gitConfig[key] def gitConfigList(key): if not _gitConfig.has_key(key): s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) _gitConfig[key] = s.strip().split(os.linesep) + if _gitConfig[key] == ['']: + _gitConfig[key] = [] return _gitConfig[key] def p4BranchesInGit(branchesAreInRemotes=True): @@ -742,43 +773,77 @@ def createOrUpdateBranchesFromOrigin(localRefPrefix = "refs/remotes/p4/", silent def originP4BranchesExist(): return gitBranchExists("origin") or gitBranchExists("origin/p4") or gitBranchExists("origin/p4/master") -def p4ChangesForPaths(depotPaths, changeRange, block_size): + +def p4ParseNumericChangeRange(parts): + changeStart = int(parts[0][1:]) + if parts[1] == '#head': + changeEnd = p4_last_change() + else: + changeEnd = int(parts[1]) + + return (changeStart, changeEnd) + +def chooseBlockSize(blockSize): + if blockSize: + return blockSize + else: + return defaultBlockSize + +def p4ChangesForPaths(depotPaths, changeRange, requestedBlockSize): assert depotPaths - assert block_size - # Parse the change range into start and end + # Parse the change range into start and end. Try to find integer + # revision ranges as these can be broken up into blocks to avoid + # hitting server-side limits (maxrows, maxscanresults). But if + # that doesn't work, fall back to using the raw revision specifier + # strings, without using block mode. + if changeRange is None or changeRange == '': - changeStart = '@1' - changeEnd = '#head' + changeStart = 1 + changeEnd = p4_last_change() + block_size = chooseBlockSize(requestedBlockSize) else: parts = changeRange.split(',') assert len(parts) == 2 - changeStart = parts[0] - changeEnd = parts[1] + try: + (changeStart, changeEnd) = p4ParseNumericChangeRange(parts) + block_size = chooseBlockSize(requestedBlockSize) + except: + changeStart = parts[0][1:] + changeEnd = parts[1] + if requestedBlockSize: + die("cannot use --changes-block-size with non-numeric revisions") + block_size = None # Accumulate change numbers in a dictionary to avoid duplicates changes = {} for p in depotPaths: # Retrieve changes a block at a time, to prevent running - # into a MaxScanRows error from the server. - start = changeStart - end = changeEnd - get_another_block = True - while get_another_block: - new_changes = [] + # into a MaxResults/MaxScanRows error from the server. + + while True: cmd = ['changes'] - cmd += ['-m', str(block_size)] - cmd += ["%s...%s,%s" % (p, start, end)] + + if block_size: + end = min(changeEnd, changeStart + block_size) + revisionRange = "%d,%d" % (changeStart, end) + else: + revisionRange = "%s,%s" % (changeStart, changeEnd) + + cmd += ["%s...@%s" % (p, revisionRange)] + for line in p4_read_pipe_lines(cmd): changeNum = int(line.split(" ")[1]) - new_changes.append(changeNum) changes[changeNum] = True - if len(new_changes) == block_size: - get_another_block = True - end = '@' + str(min(new_changes)) - else: - get_another_block = False + + if not block_size: + break + + if end >= changeEnd: + break + + changeStart = end + 1 changelist = changes.keys() changelist.sort() @@ -868,6 +933,182 @@ def wildcard_present(path): m = re.search("[*#@%]", path) return m is not None +class LargeFileSystem(object): + """Base class for large file system support.""" + + def __init__(self, writeToGitStream): + self.largeFiles = set() + self.writeToGitStream = writeToGitStream + + def generatePointer(self, cloneDestination, contentFile): + """Return the content of a pointer file that is stored in Git instead of + the actual content.""" + assert False, "Method 'generatePointer' required in " + self.__class__.__name__ + + def pushFile(self, localLargeFile): + """Push the actual content which is not stored in the Git repository to + a server.""" + assert False, "Method 'pushFile' required in " + self.__class__.__name__ + + def hasLargeFileExtension(self, relPath): + return reduce( + lambda a, b: a or b, + [relPath.endswith('.' + e) for e in gitConfigList('git-p4.largeFileExtensions')], + False + ) + + def generateTempFile(self, contents): + contentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + for d in contents: + contentFile.write(d) + contentFile.close() + return contentFile.name + + def exceedsLargeFileThreshold(self, relPath, contents): + if gitConfigInt('git-p4.largeFileThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize > gitConfigInt('git-p4.largeFileThreshold'): + return True + if gitConfigInt('git-p4.largeFileCompressedThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize <= gitConfigInt('git-p4.largeFileCompressedThreshold'): + return False + contentTempFile = self.generateTempFile(contents) + compressedContentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + zf = zipfile.ZipFile(compressedContentFile.name, mode='w') + zf.write(contentTempFile, compress_type=zipfile.ZIP_DEFLATED) + zf.close() + compressedContentsSize = zf.infolist()[0].compress_size + os.remove(contentTempFile) + os.remove(compressedContentFile.name) + if compressedContentsSize > gitConfigInt('git-p4.largeFileCompressedThreshold'): + return True + return False + + def addLargeFile(self, relPath): + self.largeFiles.add(relPath) + + def removeLargeFile(self, relPath): + self.largeFiles.remove(relPath) + + def isLargeFile(self, relPath): + return relPath in self.largeFiles + + def processContent(self, git_mode, relPath, contents): + """Processes the content of git fast import. This method decides if a + file is stored in the large file system and handles all necessary + steps.""" + if self.exceedsLargeFileThreshold(relPath, contents) or self.hasLargeFileExtension(relPath): + contentTempFile = self.generateTempFile(contents) + (git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile) + + # Move temp file to final location in large file system + largeFileDir = os.path.dirname(localLargeFile) + if not os.path.isdir(largeFileDir): + os.makedirs(largeFileDir) + shutil.move(contentTempFile, localLargeFile) + self.addLargeFile(relPath) + if gitConfigBool('git-p4.largeFilePush'): + self.pushFile(localLargeFile) + if verbose: + sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile)) + return (git_mode, contents) + +class MockLFS(LargeFileSystem): + """Mock large file system for testing.""" + + def generatePointer(self, contentFile): + """The pointer content is the original content prefixed with "pointer-". + The local filename of the large file storage is derived from the file content. + """ + with open(contentFile, 'r') as f: + content = next(f) + gitMode = '100644' + pointerContents = 'pointer-' + content + localLargeFile = os.path.join(os.getcwd(), '.git', 'mock-storage', 'local', content[:-1]) + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + """The remote filename of the large file storage is the same as the local + one but in a different directory. + """ + remotePath = os.path.join(os.path.dirname(localLargeFile), '..', 'remote') + if not os.path.exists(remotePath): + os.makedirs(remotePath) + shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile))) + +class GitLFS(LargeFileSystem): + """Git LFS as backend for the git-p4 large file system. + See https://git-lfs.github.com/ for details.""" + + def __init__(self, *args): + LargeFileSystem.__init__(self, *args) + self.baseGitAttributes = [] + + def generatePointer(self, contentFile): + """Generate a Git LFS pointer for the content. Return LFS Pointer file + mode and content which is stored in the Git repository instead of + the actual content. Return also the new location of the actual + content. + """ + pointerProcess = subprocess.Popen( + ['git', 'lfs', 'pointer', '--file=' + contentFile], + stdout=subprocess.PIPE + ) + pointerFile = pointerProcess.stdout.read() + if pointerProcess.wait(): + os.remove(contentFile) + die('git-lfs pointer command failed. Did you install the extension?') + pointerContents = [i+'\n' for i in pointerFile.split('\n')[2:][:-1]] + oid = pointerContents[1].split(' ')[1].split(':')[1][:-1] + localLargeFile = os.path.join( + os.getcwd(), + '.git', 'lfs', 'objects', oid[:2], oid[2:4], + oid, + ) + # LFS Spec states that pointer files should not have the executable bit set. + gitMode = '100644' + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + uploadProcess = subprocess.Popen( + ['git', 'lfs', 'push', '--object-id', 'origin', os.path.basename(localLargeFile)] + ) + if uploadProcess.wait(): + die('git-lfs push command failed. Did you define a remote?') + + def generateGitAttributes(self): + return ( + self.baseGitAttributes + + [ + '\n', + '#\n', + '# Git LFS (see https://git-lfs.github.com/)\n', + '#\n', + ] + + ['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(gitConfigList('git-p4.largeFileExtensions')) + ] + + ['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(self.largeFiles) if not self.hasLargeFileExtension(f) + ] + ) + + def addLargeFile(self, relPath): + LargeFileSystem.addLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def removeLargeFile(self, relPath): + LargeFileSystem.removeLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def processContent(self, git_mode, relPath, contents): + if relPath == '.gitattributes': + self.baseGitAttributes = contents + return (git_mode, self.generateGitAttributes()) + else: + return LargeFileSystem.processContent(self, git_mode, relPath, contents) + class Command: def __init__(self): self.usage = "usage: %prog [options]" @@ -1041,6 +1282,9 @@ class P4Submit(Command, P4UserMap): self.p4HasMoveCommand = p4_has_move_command() self.branch = None + if gitConfig('git-p4.largeFileSystem'): + die("Large file system not supported for git-p4 submit command. Please remove it from config.") + def check(self): if len(p4CmdList("opened ...")) > 0: die("You have files opened with perforce! Close them before starting the sync.") @@ -1909,10 +2153,14 @@ class View(object): if "unmap" in res: # it will list all of them, but only one not unmap-ped continue + if gitConfigBool("core.ignorecase"): + res['depotFile'] = res['depotFile'].lower() self.client_spec_path_cache[res['depotFile']] = self.convert_client_path(res["clientFile"]) # not found files or unmap files set to "" for depotFile in fileArgs: + if gitConfigBool("core.ignorecase"): + depotFile = depotFile.lower() if depotFile not in self.client_spec_path_cache: self.client_spec_path_cache[depotFile] = "" @@ -1921,6 +2169,9 @@ class View(object): depot file should live. Returns "" if the file should not be mapped in the client.""" + if gitConfigBool("core.ignorecase"): + depot_path = depot_path.lower() + if depot_path in self.client_spec_path_cache: return self.client_spec_path_cache[depot_path] @@ -1974,7 +2225,7 @@ class P4Sync(Command, P4UserMap): self.syncWithOrigin = True self.importIntoRemotes = True self.maxChanges = "" - self.changes_block_size = 500 + self.changes_block_size = None self.keepRepoPath = False self.depotPaths = None self.p4BranchesInGit = [] @@ -1984,6 +2235,13 @@ class P4Sync(Command, P4UserMap): self.clientSpecDirs = None self.tempBranches = [] self.tempBranchLocation = "git-p4-tmp" + self.largeFileSystem = None + + if gitConfig('git-p4.largeFileSystem'): + largeFileSystemConstructor = globals()[gitConfig('git-p4.largeFileSystem')] + self.largeFileSystem = largeFileSystemConstructor( + lambda git_mode, relPath, contents: self.writeToGitStream(git_mode, relPath, contents) + ) if gitConfig("git-p4.syncFromOrigin") == "false": self.syncWithOrigin = False @@ -2104,13 +2362,22 @@ class P4Sync(Command, P4UserMap): return branches + def writeToGitStream(self, gitMode, relPath, contents): + self.gitStream.write('M %s inline %s\n' % (gitMode, relPath)) + self.gitStream.write('data %d\n' % sum(len(d) for d in contents)) + for d in contents: + self.gitStream.write(d) + self.gitStream.write('\n') + # output one file from the P4 stream # - helper for streamP4Files def streamOneP4File(self, file, contents): relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) if verbose: - sys.stderr.write("%s\n" % relPath) + size = int(self.stream_file['fileSize']) + sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024)) + sys.stdout.flush() (type_base, type_mods) = split_p4_type(file["type"]) @@ -2145,10 +2412,17 @@ class P4Sync(Command, P4UserMap): # them back too. This is not needed to the cygwin windows version, # just the native "NT" type. # - text = p4_read_pipe(['print', '-q', '-o', '-', file['depotFile']]) - if p4_version_string().find("/NT") >= 0: - text = text.replace("\r\n", "\n") - contents = [ text ] + try: + text = p4_read_pipe(['print', '-q', '-o', '-', '%s@%s' % (file['depotFile'], file['change'])]) + except Exception as e: + if 'Translation of file content failed' in str(e): + type_base = 'binary' + else: + raise e + else: + if p4_version_string().find('/NT') >= 0: + text = text.replace('\r\n', '\n') + contents = [ text ] if type_base == "apple": # Apple filetype files will be streamed as a concatenation of @@ -2172,24 +2446,31 @@ class P4Sync(Command, P4UserMap): text = regexp.sub(r'$\1$', text) contents = [ text ] - self.gitStream.write("M %s inline %s\n" % (git_mode, relPath)) + try: + relPath.decode('ascii') + except: + encoding = 'utf8' + if gitConfig('git-p4.pathEncoding'): + encoding = gitConfig('git-p4.pathEncoding') + relPath = relPath.decode(encoding, 'replace').encode('utf8', 'replace') + if self.verbose: + print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, relPath) - # total length... - length = 0 - for d in contents: - length = length + len(d) + if self.largeFileSystem: + (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents) - self.gitStream.write("data %d\n" % length) - for d in contents: - self.gitStream.write(d) - self.gitStream.write("\n") + self.writeToGitStream(git_mode, relPath, contents) def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) if verbose: - sys.stderr.write("delete %s\n" % relPath) + sys.stdout.write("delete %s\n" % relPath) + sys.stdout.flush() self.gitStream.write("D %s\n" % relPath) + if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath): + self.largeFileSystem.removeLargeFile(relPath) + # handle another chunk of streaming data def streamP4FilesCb(self, marshalled): @@ -2199,6 +2480,14 @@ class P4Sync(Command, P4UserMap): if marshalled["code"] == "error": if "data" in marshalled: err = marshalled["data"].rstrip() + + if not err and 'fileSize' in self.stream_file: + required_bytes = int((4 * int(self.stream_file["fileSize"])) - calcDiskFree()) + if required_bytes > 0: + err = 'Not enough space left on %s! Free at least %i MB.' % ( + os.getcwd(), required_bytes/1024/1024 + ) + if err: f = None if self.stream_have_file_info: @@ -2227,10 +2516,23 @@ class P4Sync(Command, P4UserMap): # 'data' field we need to append to our array for k in marshalled.keys(): if k == 'data': + if 'streamContentSize' not in self.stream_file: + self.stream_file['streamContentSize'] = 0 + self.stream_file['streamContentSize'] += len(marshalled['data']) self.stream_contents.append(marshalled['data']) else: self.stream_file[k] = marshalled[k] + if (verbose and + 'streamContentSize' in self.stream_file and + 'fileSize' in self.stream_file and + 'depotFile' in self.stream_file): + size = int(self.stream_file["fileSize"]) + if size > 0: + progress = 100*self.stream_file['streamContentSize']/size + sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024))) + sys.stdout.flush() + self.stream_have_file_info = True # Stream directly from "p4 files" into "git fast-import" @@ -2281,8 +2583,11 @@ class P4Sync(Command, P4UserMap): else: return "%s <a@b>" % userid - # Stream a p4 tag def streamTag(self, gitStream, labelName, labelDetails, commit, epoch): + """ Stream a p4 tag. + commit is either a git commit, or a fast-import mark, ":<p4commit>" + """ + if verbose: print "writing tag %s for commit %s" % (labelName, commit) gitStream.write("tag %s\n" % labelName) @@ -2333,7 +2638,7 @@ class P4Sync(Command, P4UserMap): self.clientSpecDirs.update_client_spec_path_cache(files) self.gitStream.write("commit %s\n" % branch) -# gitStream.write("mark :%s\n" % details["change"]) + self.gitStream.write("mark :%s\n" % details["change"]) self.committedChanges.add(int(details["change"])) committer = "" if author not in self.users: @@ -2452,13 +2757,19 @@ class P4Sync(Command, P4UserMap): if change.has_key('change'): # find the corresponding git commit; take the oldest commit changelist = int(change['change']) - gitCommit = read_pipe(["git", "rev-list", "--max-count=1", - "--reverse", ":/\[git-p4:.*change = %d\]" % changelist]) - if len(gitCommit) == 0: - print "could not find git commit for changelist %d" % changelist - else: - gitCommit = gitCommit.strip() + if changelist in self.committedChanges: + gitCommit = ":%d" % changelist # use a fast-import mark commitFound = True + else: + gitCommit = read_pipe(["git", "rev-list", "--max-count=1", + "--reverse", ":/\[git-p4:.*change = %d\]" % changelist], ignore_error=True) + if len(gitCommit) == 0: + print "importing label %s: could not find git commit for changelist %d" % (name, changelist) + else: + commitFound = True + gitCommit = gitCommit.strip() + + if commitFound: # Convert from p4 time format try: tmwhen = time.strptime(labelDetails['Update'], "%Y/%m/%d %H:%M:%S") |