summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sha1_file.c84
-rwxr-xr-xt/t1050-large.sh27
2 files changed, 110 insertions, 1 deletions
diff --git a/sha1_file.c b/sha1_file.c
index 49416b0291..f0ca6a1749 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -11,6 +11,7 @@
#include "pack.h"
#include "blob.h"
#include "commit.h"
+#include "run-command.h"
#include "tag.h"
#include "tree.h"
#include "tree-walk.h"
@@ -2658,6 +2659,85 @@ static int index_core(unsigned char *sha1, int fd, size_t size,
return ret;
}
+/*
+ * This creates one packfile per large blob, because the caller
+ * immediately wants the result sha1, and fast-import can report the
+ * object name via marks mechanism only by closing the created
+ * packfile.
+ *
+ * This also bypasses the usual "convert-to-git" dance, and that is on
+ * purpose. We could write a streaming version of the converting
+ * functions and insert that before feeding the data to fast-import
+ * (or equivalent in-core API described above), but the primary
+ * motivation for trying to stream from the working tree file and to
+ * avoid mmaping it in core is to deal with large binary blobs, and
+ * by definition they do _not_ want to get any conversion.
+ */
+static int index_stream(unsigned char *sha1, int fd, size_t size,
+ enum object_type type, const char *path,
+ unsigned flags)
+{
+ struct child_process fast_import;
+ char export_marks[512];
+ const char *argv[] = { "fast-import", "--quiet", export_marks, NULL };
+ char tmpfile[512];
+ char fast_import_cmd[512];
+ char buf[512];
+ int len, tmpfd;
+
+ strcpy(tmpfile, git_path("hashstream_XXXXXX"));
+ tmpfd = git_mkstemp_mode(tmpfile, 0600);
+ if (tmpfd < 0)
+ die_errno("cannot create tempfile: %s", tmpfile);
+ if (close(tmpfd))
+ die_errno("cannot close tempfile: %s", tmpfile);
+ sprintf(export_marks, "--export-marks=%s", tmpfile);
+
+ memset(&fast_import, 0, sizeof(fast_import));
+ fast_import.in = -1;
+ fast_import.argv = argv;
+ fast_import.git_cmd = 1;
+ if (start_command(&fast_import))
+ die_errno("index-stream: git fast-import failed");
+
+ len = sprintf(fast_import_cmd, "blob\nmark :1\ndata %lu\n",
+ (unsigned long) size);
+ write_or_whine(fast_import.in, fast_import_cmd, len,
+ "index-stream: feeding fast-import");
+ while (size) {
+ char buf[10240];
+ size_t sz = size < sizeof(buf) ? size : sizeof(buf);
+ size_t actual;
+
+ actual = read_in_full(fd, buf, sz);
+ if (actual < 0)
+ die_errno("index-stream: reading input");
+ if (write_in_full(fast_import.in, buf, actual) != actual)
+ die_errno("index-stream: feeding fast-import");
+ size -= actual;
+ }
+ if (close(fast_import.in))
+ die_errno("index-stream: closing fast-import");
+ if (finish_command(&fast_import))
+ die_errno("index-stream: finishing fast-import");
+
+ tmpfd = open(tmpfile, O_RDONLY);
+ if (tmpfd < 0)
+ die_errno("index-stream: cannot open fast-import mark");
+ len = read(tmpfd, buf, sizeof(buf));
+ if (len < 0)
+ die_errno("index-stream: reading fast-import mark");
+ if (close(tmpfd) < 0)
+ die_errno("index-stream: closing fast-import mark");
+ if (unlink(tmpfile))
+ die_errno("index-stream: unlinking fast-import mark");
+ if (len != 44 ||
+ memcmp(":1 ", buf, 3) ||
+ get_sha1_hex(buf + 3, sha1))
+ die_errno("index-stream: unexpected fast-import mark: <%s>", buf);
+ return 0;
+}
+
int index_fd(unsigned char *sha1, int fd, struct stat *st,
enum object_type type, const char *path, unsigned flags)
{
@@ -2666,8 +2746,10 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
- else
+ else if (size <= big_file_threshold || type != OBJ_BLOB)
ret = index_core(sha1, fd, size, type, path, flags);
+ else
+ ret = index_stream(sha1, fd, size, type, path, flags);
close(fd);
return ret;
}
diff --git a/t/t1050-large.sh b/t/t1050-large.sh
new file mode 100755
index 0000000000..deba111bd7
--- /dev/null
+++ b/t/t1050-large.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+# Copyright (c) 2011, Google Inc.
+
+test_description='adding and checking out large blobs'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+ git config core.bigfilethreshold 200k &&
+ echo X | dd of=large bs=1k seek=2000
+'
+
+test_expect_success 'add a large file' '
+ git add large &&
+ # make sure we got a packfile and no loose objects
+ test -f .git/objects/pack/pack-*.pack &&
+ test ! -f .git/objects/??/??????????????????????????????????????
+'
+
+test_expect_success 'checkout a large file' '
+ large=$(git rev-parse :large) &&
+ git update-index --add --cacheinfo 100644 $large another &&
+ git checkout another &&
+ cmp large another ;# this must not be test_cmp
+'
+
+test_done