summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--convert.c55
-rw-r--r--convert.h5
-rw-r--r--sha1_file.c27
-rwxr-xr-xt/t0021-conversion.sh24
4 files changed, 99 insertions, 12 deletions
diff --git a/convert.c b/convert.c
index cb5fbb45ea..677d339a8b 100644
--- a/convert.c
+++ b/convert.c
@@ -312,11 +312,12 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
struct filter_params {
const char *src;
unsigned long size;
+ int fd;
const char *cmd;
const char *path;
};
-static int filter_buffer(int in, int out, void *data)
+static int filter_buffer_or_fd(int in, int out, void *data)
{
/*
* Spawn cmd and feed the buffer contents through its stdin.
@@ -355,7 +356,12 @@ static int filter_buffer(int in, int out, void *data)
sigchain_push(SIGPIPE, SIG_IGN);
- write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
+ if (params->src) {
+ write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
+ } else {
+ write_err = copy_fd(params->fd, child_process.in);
+ }
+
if (close(child_process.in))
write_err = 1;
if (write_err)
@@ -371,7 +377,7 @@ static int filter_buffer(int in, int out, void *data)
return (write_err || status);
}
-static int apply_filter(const char *path, const char *src, size_t len,
+static int apply_filter(const char *path, const char *src, size_t len, int fd,
struct strbuf *dst, const char *cmd)
{
/*
@@ -392,11 +398,12 @@ static int apply_filter(const char *path, const char *src, size_t len,
return 1;
memset(&async, 0, sizeof(async));
- async.proc = filter_buffer;
+ async.proc = filter_buffer_or_fd;
async.data = &params;
async.out = -1;
params.src = src;
params.size = len;
+ params.fd = fd;
params.cmd = cmd;
params.path = path;
@@ -747,6 +754,25 @@ static void convert_attrs(struct conv_attrs *ca, const char *path)
}
}
+int would_convert_to_git_filter_fd(const char *path)
+{
+ struct conv_attrs ca;
+
+ convert_attrs(&ca, path);
+ if (!ca.drv)
+ return 0;
+
+ /*
+ * Apply a filter to an fd only if the filter is required to succeed.
+ * We must die if the filter fails, because the original data before
+ * filtering is not available.
+ */
+ if (!ca.drv->required)
+ return 0;
+
+ return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
+}
+
int convert_to_git(const char *path, const char *src, size_t len,
struct strbuf *dst, enum safe_crlf checksafe)
{
@@ -761,7 +787,7 @@ int convert_to_git(const char *path, const char *src, size_t len,
required = ca.drv->required;
}
- ret |= apply_filter(path, src, len, dst, filter);
+ ret |= apply_filter(path, src, len, -1, dst, filter);
if (!ret && required)
die("%s: clean filter '%s' failed", path, ca.drv->name);
@@ -778,6 +804,23 @@ int convert_to_git(const char *path, const char *src, size_t len,
return ret | ident_to_git(path, src, len, dst, ca.ident);
}
+void convert_to_git_filter_fd(const char *path, int fd, struct strbuf *dst,
+ enum safe_crlf checksafe)
+{
+ struct conv_attrs ca;
+ convert_attrs(&ca, path);
+
+ assert(ca.drv);
+ assert(ca.drv->clean);
+
+ if (!apply_filter(path, NULL, 0, fd, dst, ca.drv->clean))
+ die("%s: clean filter '%s' failed", path, ca.drv->name);
+
+ ca.crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+ crlf_to_git(path, dst->buf, dst->len, dst, ca.crlf_action, checksafe);
+ ident_to_git(path, dst->buf, dst->len, dst, ca.ident);
+}
+
static int convert_to_working_tree_internal(const char *path, const char *src,
size_t len, struct strbuf *dst,
int normalizing)
@@ -811,7 +854,7 @@ static int convert_to_working_tree_internal(const char *path, const char *src,
}
}
- ret_filter = apply_filter(path, src, len, dst, filter);
+ ret_filter = apply_filter(path, src, len, -1, dst, filter);
if (!ret_filter && required)
die("%s: smudge filter %s failed", path, ca.drv->name);
diff --git a/convert.h b/convert.h
index c638b33632..d9d853cd3d 100644
--- a/convert.h
+++ b/convert.h
@@ -44,6 +44,11 @@ static inline int would_convert_to_git(const char *path)
{
return convert_to_git(path, NULL, 0, NULL, 0);
}
+/* Precondition: would_convert_to_git_filter_fd(path) == true */
+extern void convert_to_git_filter_fd(const char *path, int fd,
+ struct strbuf *dst,
+ enum safe_crlf checksafe);
+extern int would_convert_to_git_filter_fd(const char *path);
/*****************************************************************
*
diff --git a/sha1_file.c b/sha1_file.c
index d9b51578cc..423ec64e87 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3090,6 +3090,29 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
return ret;
}
+static int index_stream_convert_blob(unsigned char *sha1, int fd,
+ const char *path, unsigned flags)
+{
+ int ret;
+ const int write_object = flags & HASH_WRITE_OBJECT;
+ struct strbuf sbuf = STRBUF_INIT;
+
+ assert(path);
+ assert(would_convert_to_git_filter_fd(path));
+
+ convert_to_git_filter_fd(path, fd, &sbuf,
+ write_object ? safe_crlf : SAFE_CRLF_FALSE);
+
+ if (write_object)
+ ret = write_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+ sha1);
+ else
+ ret = hash_sha1_file(sbuf.buf, sbuf.len, typename(OBJ_BLOB),
+ sha1);
+ strbuf_release(&sbuf);
+ return ret;
+}
+
static int index_pipe(unsigned char *sha1, int fd, enum object_type type,
const char *path, unsigned flags)
{
@@ -3157,7 +3180,9 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st,
int ret;
size_t size = xsize_t(st->st_size);
- if (!S_ISREG(st->st_mode))
+ if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(path))
+ ret = index_stream_convert_blob(sha1, fd, path, flags);
+ else if (!S_ISREG(st->st_mode))
ret = index_pipe(sha1, fd, type, path, flags);
else if (size <= big_file_threshold || type != OBJ_BLOB ||
(path && would_convert_to_git(path)))
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index f890c54d13..ca7d2a630a 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -153,17 +153,23 @@ test_expect_success 'filter shell-escaped filenames' '
:
'
-test_expect_success 'required filter success' '
- git config filter.required.smudge cat &&
- git config filter.required.clean cat &&
+test_expect_success 'required filter should filter data' '
+ git config filter.required.smudge ./rot13.sh &&
+ git config filter.required.clean ./rot13.sh &&
git config filter.required.required true &&
echo "*.r filter=required" >.gitattributes &&
- echo test >test.r &&
+ cat test.o >test.r &&
git add test.r &&
+
rm -f test.r &&
- git checkout -- test.r
+ git checkout -- test.r &&
+ cmp test.o test.r &&
+
+ ./rot13.sh <test.o >expected &&
+ git cat-file blob :test.r >actual &&
+ cmp expected actual
'
test_expect_success 'required filter smudge failure' '
@@ -190,6 +196,14 @@ test_expect_success 'required filter clean failure' '
test_must_fail git add test.fc
'
+test_expect_success 'filtering large input to small output should use little memory' '
+ git config filter.devnull.clean "cat >/dev/null" &&
+ git config filter.devnull.required true &&
+ for i in $(test_seq 1 30); do printf "%1048576d" 1; done >30MB &&
+ echo "30MB filter=devnull" >.gitattributes &&
+ GIT_MMAP_LIMIT=1m GIT_ALLOC_LIMIT=1m git add 30MB
+'
+
test_expect_success EXPENSIVE 'filter large file' '
git config filter.largefile.smudge cat &&
git config filter.largefile.clean cat &&