From d5fdae67379778502ca8b3b3186ce4692d912e30 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:07 +0200 Subject: get_ref_dir(): return early if directory cannot be read Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 85 ++++++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/refs.c b/refs.c index 09322fede0..d539241d98 100644 --- a/refs.c +++ b/refs.c @@ -754,6 +754,9 @@ static void get_ref_dir(struct ref_cache *refs, const char *base, { DIR *d; const char *path; + struct dirent *de; + int baselen; + char *refname; if (*refs->name) path = git_path_submodule(refs->name, "%s", base); @@ -761,55 +764,55 @@ static void get_ref_dir(struct ref_cache *refs, const char *base, path = git_path("%s", base); d = opendir(path); - if (d) { - struct dirent *de; - int baselen = strlen(base); - char *refname = xmalloc(baselen + 257); + if (!d) + return; - memcpy(refname, base, baselen); - if (baselen && base[baselen-1] != '/') - refname[baselen++] = '/'; + baselen = strlen(base); + refname = xmalloc(baselen + 257); - while ((de = readdir(d)) != NULL) { - unsigned char sha1[20]; - struct stat st; - int flag; - int namelen; - const char *refdir; + memcpy(refname, base, baselen); + if (baselen && base[baselen-1] != '/') + refname[baselen++] = '/'; - if (de->d_name[0] == '.') - continue; - namelen = strlen(de->d_name); - if (namelen > 255) - continue; - if (has_extension(de->d_name, ".lock")) - continue; - memcpy(refname + baselen, de->d_name, namelen+1); - refdir = *refs->name - ? git_path_submodule(refs->name, "%s", refname) - : git_path("%s", refname); - if (stat(refdir, &st) < 0) - continue; - if (S_ISDIR(st.st_mode)) { - get_ref_dir(refs, refname, dir); - continue; - } - if (*refs->name) { - hashclr(sha1); - flag = 0; - if (resolve_gitlink_ref(refs->name, refname, sha1) < 0) { - hashclr(sha1); - flag |= REF_ISBROKEN; - } - } else if (read_ref_full(refname, sha1, 1, &flag)) { + while ((de = readdir(d)) != NULL) { + unsigned char sha1[20]; + struct stat st; + int flag; + int namelen; + const char *refdir; + + if (de->d_name[0] == '.') + continue; + namelen = strlen(de->d_name); + if (namelen > 255) + continue; + if (has_extension(de->d_name, ".lock")) + continue; + memcpy(refname + baselen, de->d_name, namelen+1); + refdir = *refs->name + ? git_path_submodule(refs->name, "%s", refname) + : git_path("%s", refname); + if (stat(refdir, &st) < 0) + continue; + if (S_ISDIR(st.st_mode)) { + get_ref_dir(refs, refname, dir); + continue; + } + if (*refs->name) { + hashclr(sha1); + flag = 0; + if (resolve_gitlink_ref(refs->name, refname, sha1) < 0) { hashclr(sha1); flag |= REF_ISBROKEN; } - add_ref(dir, create_ref_entry(refname, sha1, flag, 1)); + } else if (read_ref_full(refname, sha1, 1, &flag)) { + hashclr(sha1); + flag |= REF_ISBROKEN; } - free(refname); - closedir(d); + add_ref(dir, create_ref_entry(refname, sha1, flag, 1)); } + free(refname); + closedir(d); } static struct ref_dir *get_loose_refs(struct ref_cache *refs) -- cgit v1.2.3 From 6163cd8a2a8703bdbd30920731aec32c68c39453 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:00 +0700 Subject: streaming: void pointer instead of char pointer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow any kind of buffer to be fed to read_istream() without an explicit cast by making it's buf argument a void pointer. It's about arbitrary data, not only characters. Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- streaming.c | 2 +- streaming.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/streaming.c b/streaming.c index 7e7ee2be6f..3a3cd1206a 100644 --- a/streaming.c +++ b/streaming.c @@ -99,7 +99,7 @@ int close_istream(struct git_istream *st) return r; } -ssize_t read_istream(struct git_istream *st, char *buf, size_t sz) +ssize_t read_istream(struct git_istream *st, void *buf, size_t sz) { return st->vtbl->read(st, buf, sz); } diff --git a/streaming.h b/streaming.h index 3e827709c8..1d05c2a465 100644 --- a/streaming.h +++ b/streaming.h @@ -10,7 +10,7 @@ struct git_istream; extern struct git_istream *open_istream(const unsigned char *, enum object_type *, unsigned long *, struct stream_filter *); extern int close_istream(struct git_istream *); -extern ssize_t read_istream(struct git_istream *, char *, size_t); +extern ssize_t read_istream(struct git_istream *, void *, size_t); extern int stream_blob_to_fd(int fd, const unsigned char *, struct stream_filter *, int can_seek); -- cgit v1.2.3 From d240d4102103215e9fe38431b1702a4024d2f1a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Thu, 3 May 2012 08:51:01 +0700 Subject: archive-tar: turn write_tar_entry into blob-writing only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this patch write_tar_entry() can: - write global header by write_global_extended_header() calling write_tar_entry with with both sha1 and path == NULL - write extended header for symlinks, by write_tar_entry() calling itself with sha1 != NULL and path == NULL - write a normal blob. In this case both sha1 and path are valid. After this patch, the first two call sites are modified to write the header without calling write_tar_entry(). The function is now for writing blobs only. This simplifies handling when write_tar_entry() learns about large blobs. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-tar.c | 78 +++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 52 insertions(+), 26 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 20af0051a3..1727ab90ae 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -123,6 +123,43 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen) return i; } +static void prepare_header(struct archiver_args *args, + struct ustar_header *header, + unsigned int mode, unsigned long size) +{ + sprintf(header->mode, "%07o", mode & 07777); + sprintf(header->size, "%011lo", S_ISREG(mode) ? size : 0); + sprintf(header->mtime, "%011lo", (unsigned long) args->time); + + sprintf(header->uid, "%07o", 0); + sprintf(header->gid, "%07o", 0); + strlcpy(header->uname, "root", sizeof(header->uname)); + strlcpy(header->gname, "root", sizeof(header->gname)); + sprintf(header->devmajor, "%07o", 0); + sprintf(header->devminor, "%07o", 0); + + memcpy(header->magic, "ustar", 6); + memcpy(header->version, "00", 2); + + sprintf(header->chksum, "%07o", ustar_header_chksum(header)); +} + +static int write_extended_header(struct archiver_args *args, + const unsigned char *sha1, + const void *buffer, unsigned long size) +{ + struct ustar_header header; + unsigned int mode; + memset(&header, 0, sizeof(header)); + *header.typeflag = TYPEFLAG_EXT_HEADER; + mode = 0100666; + sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); + prepare_header(args, &header, mode, size); + write_blocked(&header, sizeof(header)); + write_blocked(buffer, size); + return 0; +} + static int write_tar_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, unsigned int mode, void *buffer, unsigned long size) @@ -134,13 +171,9 @@ static int write_tar_entry(struct archiver_args *args, memset(&header, 0, sizeof(header)); if (!sha1) { - *header.typeflag = TYPEFLAG_GLOBAL_HEADER; - mode = 0100666; - strcpy(header.name, "pax_global_header"); + die("BUG: sha1 == NULL is not supported"); } else if (!path) { - *header.typeflag = TYPEFLAG_EXT_HEADER; - mode = 0100666; - sprintf(header.name, "%s.paxheader", sha1_to_hex(sha1)); + die("BUG: path == NULL is not supported"); } else { if (S_ISDIR(mode) || S_ISGITLINK(mode)) { *header.typeflag = TYPEFLAG_DIR; @@ -182,25 +215,11 @@ static int write_tar_entry(struct archiver_args *args, memcpy(header.linkname, buffer, size); } - sprintf(header.mode, "%07o", mode & 07777); - sprintf(header.size, "%011lo", S_ISREG(mode) ? size : 0); - sprintf(header.mtime, "%011lo", (unsigned long) args->time); - - sprintf(header.uid, "%07o", 0); - sprintf(header.gid, "%07o", 0); - strlcpy(header.uname, "root", sizeof(header.uname)); - strlcpy(header.gname, "root", sizeof(header.gname)); - sprintf(header.devmajor, "%07o", 0); - sprintf(header.devminor, "%07o", 0); - - memcpy(header.magic, "ustar", 6); - memcpy(header.version, "00", 2); - - sprintf(header.chksum, "%07o", ustar_header_chksum(&header)); + prepare_header(args, &header, mode, size); if (ext_header.len > 0) { - err = write_tar_entry(args, sha1, NULL, 0, 0, ext_header.buf, - ext_header.len); + err = write_extended_header(args, sha1, ext_header.buf, + ext_header.len); if (err) return err; } @@ -215,11 +234,18 @@ static int write_global_extended_header(struct archiver_args *args) { const unsigned char *sha1 = args->commit_sha1; struct strbuf ext_header = STRBUF_INIT; - int err; + struct ustar_header header; + unsigned int mode; + int err = 0; strbuf_append_ext_header(&ext_header, "comment", sha1_to_hex(sha1), 40); - err = write_tar_entry(args, NULL, NULL, 0, 0, ext_header.buf, - ext_header.len); + memset(&header, 0, sizeof(header)); + *header.typeflag = TYPEFLAG_GLOBAL_HEADER; + mode = 0100666; + strcpy(header.name, "pax_global_header"); + prepare_header(args, &header, mode, ext_header.len); + write_blocked(&header, sizeof(header)); + write_blocked(ext_header.buf, ext_header.len); strbuf_release(&ext_header); return err; } -- cgit v1.2.3 From 853907097af803c595f0c12fc355c907702eb7c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Thu, 3 May 2012 08:51:02 +0700 Subject: archive-tar: unindent write_tar_entry by one level MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's used to be if (!sha1) { ... } else if (!path) { ... } else { ... } Now that the first two blocks are no-op. We can remove the if/else skeleton and put the else block back by one indent level. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-tar.c | 56 +++++++++++++++++++++++++------------------------------- 1 file changed, 25 insertions(+), 31 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 1727ab90ae..6c8a0bd3bf 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -170,40 +170,34 @@ static int write_tar_entry(struct archiver_args *args, memset(&header, 0, sizeof(header)); - if (!sha1) { - die("BUG: sha1 == NULL is not supported"); - } else if (!path) { - die("BUG: path == NULL is not supported"); + if (S_ISDIR(mode) || S_ISGITLINK(mode)) { + *header.typeflag = TYPEFLAG_DIR; + mode = (mode | 0777) & ~tar_umask; + } else if (S_ISLNK(mode)) { + *header.typeflag = TYPEFLAG_LNK; + mode |= 0777; + } else if (S_ISREG(mode)) { + *header.typeflag = TYPEFLAG_REG; + mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask; } else { - if (S_ISDIR(mode) || S_ISGITLINK(mode)) { - *header.typeflag = TYPEFLAG_DIR; - mode = (mode | 0777) & ~tar_umask; - } else if (S_ISLNK(mode)) { - *header.typeflag = TYPEFLAG_LNK; - mode |= 0777; - } else if (S_ISREG(mode)) { - *header.typeflag = TYPEFLAG_REG; - mode = (mode | ((mode & 0100) ? 0777 : 0666)) & ~tar_umask; + return error("unsupported file mode: 0%o (SHA1: %s)", + mode, sha1_to_hex(sha1)); + } + if (pathlen > sizeof(header.name)) { + size_t plen = get_path_prefix(path, pathlen, + sizeof(header.prefix)); + size_t rest = pathlen - plen - 1; + if (plen > 0 && rest <= sizeof(header.name)) { + memcpy(header.prefix, path, plen); + memcpy(header.name, path + plen + 1, rest); } else { - return error("unsupported file mode: 0%o (SHA1: %s)", - mode, sha1_to_hex(sha1)); + sprintf(header.name, "%s.data", + sha1_to_hex(sha1)); + strbuf_append_ext_header(&ext_header, "path", + path, pathlen); } - if (pathlen > sizeof(header.name)) { - size_t plen = get_path_prefix(path, pathlen, - sizeof(header.prefix)); - size_t rest = pathlen - plen - 1; - if (plen > 0 && rest <= sizeof(header.name)) { - memcpy(header.prefix, path, plen); - memcpy(header.name, path + plen + 1, rest); - } else { - sprintf(header.name, "%s.data", - sha1_to_hex(sha1)); - strbuf_append_ext_header(&ext_header, "path", - path, pathlen); - } - } else - memcpy(header.name, path, pathlen); - } + } else + memcpy(header.name, path, pathlen); if (S_ISLNK(mode) && buffer) { if (size > sizeof(header.linkname)) { -- cgit v1.2.3 From 9cb513b7988c2fe443c47186e42dd827b76ddb14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Thu, 3 May 2012 08:51:03 +0700 Subject: archive: delegate blob reading to backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit archive-tar.c and archive-zip.c now perform conversion check, with help of sha1_file_to_archive() from archive.c This gives backends more freedom in dealing with (streaming) large blobs. Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-tar.c | 25 +++++++++++++++++++++---- archive-zip.c | 15 +++++++++++++-- archive.c | 28 +++++++++++----------------- archive.h | 10 +++++++++- 4 files changed, 54 insertions(+), 24 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 6c8a0bd3bf..3be0cdf350 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -161,11 +161,15 @@ static int write_extended_header(struct archiver_args *args, } static int write_tar_entry(struct archiver_args *args, - const unsigned char *sha1, const char *path, size_t pathlen, - unsigned int mode, void *buffer, unsigned long size) + const unsigned char *sha1, + const char *path, size_t pathlen, + unsigned int mode) { struct ustar_header header; struct strbuf ext_header = STRBUF_INIT; + unsigned int old_mode = mode; + unsigned long size; + void *buffer; int err = 0; memset(&header, 0, sizeof(header)); @@ -199,7 +203,17 @@ static int write_tar_entry(struct archiver_args *args, } else memcpy(header.name, path, pathlen); - if (S_ISLNK(mode) && buffer) { + if (S_ISLNK(mode) || S_ISREG(mode)) { + enum object_type type; + buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size); + if (!buffer) + return error("cannot read %s", sha1_to_hex(sha1)); + } else { + buffer = NULL; + size = 0; + } + + if (S_ISLNK(mode)) { if (size > sizeof(header.linkname)) { sprintf(header.linkname, "see %s.paxheader", sha1_to_hex(sha1)); @@ -214,13 +228,16 @@ static int write_tar_entry(struct archiver_args *args, if (ext_header.len > 0) { err = write_extended_header(args, sha1, ext_header.buf, ext_header.len); - if (err) + if (err) { + free(buffer); return err; + } } strbuf_release(&ext_header); write_blocked(&header, sizeof(header)); if (S_ISREG(mode) && buffer && size > 0) write_blocked(buffer, size); + free(buffer); return err; } diff --git a/archive-zip.c b/archive-zip.c index 02d1f3787a..716cc42710 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -121,8 +121,9 @@ static void *zlib_deflate(void *data, unsigned long size, } static int write_zip_entry(struct archiver_args *args, - const unsigned char *sha1, const char *path, size_t pathlen, - unsigned int mode, void *buffer, unsigned long size) + const unsigned char *sha1, + const char *path, size_t pathlen, + unsigned int mode) { struct zip_local_header header; struct zip_dir_header dirent; @@ -134,6 +135,8 @@ static int write_zip_entry(struct archiver_args *args, int method; unsigned char *out; void *deflated = NULL; + void *buffer; + unsigned long size; crc = crc32(0, NULL, 0); @@ -148,7 +151,14 @@ static int write_zip_entry(struct archiver_args *args, out = NULL; uncompressed_size = 0; compressed_size = 0; + buffer = NULL; + size = 0; } else if (S_ISREG(mode) || S_ISLNK(mode)) { + enum object_type type; + buffer = sha1_file_to_archive(args, path, sha1, mode, &type, &size); + if (!buffer) + return error("cannot read %s", sha1_to_hex(sha1)); + method = 0; attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; @@ -229,6 +239,7 @@ static int write_zip_entry(struct archiver_args *args, } free(deflated); + free(buffer); return 0; } diff --git a/archive.c b/archive.c index 1ee837d717..cd083eaf9a 100644 --- a/archive.c +++ b/archive.c @@ -59,12 +59,15 @@ static void format_subst(const struct commit *commit, free(to_free); } -static void *sha1_file_to_archive(const char *path, const unsigned char *sha1, - unsigned int mode, enum object_type *type, - unsigned long *sizep, const struct commit *commit) +void *sha1_file_to_archive(const struct archiver_args *args, + const char *path, const unsigned char *sha1, + unsigned int mode, enum object_type *type, + unsigned long *sizep) { void *buffer; + const struct commit *commit = args->convert ? args->commit : NULL; + path += args->baselen; buffer = read_sha1_file(sha1, type, sizep); if (buffer && S_ISREG(mode)) { struct strbuf buf = STRBUF_INIT; @@ -109,12 +112,9 @@ static int write_archive_entry(const unsigned char *sha1, const char *base, write_archive_entry_fn_t write_entry = c->write_entry; struct git_attr_check check[2]; const char *path_without_prefix; - int convert = 0; int err; - enum object_type type; - unsigned long size; - void *buffer; + args->convert = 0; strbuf_reset(&path); strbuf_grow(&path, PATH_MAX); strbuf_add(&path, args->base, args->baselen); @@ -126,28 +126,22 @@ static int write_archive_entry(const unsigned char *sha1, const char *base, if (!git_check_attr(path_without_prefix, ARRAY_SIZE(check), check)) { if (ATTR_TRUE(check[0].value)) return 0; - convert = ATTR_TRUE(check[1].value); + args->convert = ATTR_TRUE(check[1].value); } if (S_ISDIR(mode) || S_ISGITLINK(mode)) { strbuf_addch(&path, '/'); if (args->verbose) fprintf(stderr, "%.*s\n", (int)path.len, path.buf); - err = write_entry(args, sha1, path.buf, path.len, mode, NULL, 0); + err = write_entry(args, sha1, path.buf, path.len, mode); if (err) return err; return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0); } - buffer = sha1_file_to_archive(path_without_prefix, sha1, mode, - &type, &size, convert ? args->commit : NULL); - if (!buffer) - return error("cannot read %s", sha1_to_hex(sha1)); if (args->verbose) fprintf(stderr, "%.*s\n", (int)path.len, path.buf); - err = write_entry(args, sha1, path.buf, path.len, mode, buffer, size); - free(buffer); - return err; + return write_entry(args, sha1, path.buf, path.len, mode); } int write_archive_entries(struct archiver_args *args, @@ -167,7 +161,7 @@ int write_archive_entries(struct archiver_args *args, if (args->verbose) fprintf(stderr, "%.*s\n", (int)len, args->base); err = write_entry(args, args->tree->object.sha1, args->base, - len, 040777, NULL, 0); + len, 040777); if (err) return err; } diff --git a/archive.h b/archive.h index 2b0884f1ef..895afcdc7a 100644 --- a/archive.h +++ b/archive.h @@ -11,6 +11,7 @@ struct archiver_args { const char **pathspec; unsigned int verbose : 1; unsigned int worktree_attributes : 1; + unsigned int convert : 1; int compression_level; }; @@ -27,11 +28,18 @@ extern void register_archiver(struct archiver *); extern void init_tar_archiver(void); extern void init_zip_archiver(void); -typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, unsigned int mode, void *buffer, unsigned long size); +typedef int (*write_archive_entry_fn_t)(struct archiver_args *args, + const unsigned char *sha1, + const char *path, size_t pathlen, + unsigned int mode); extern int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry); extern int write_archive(int argc, const char **argv, const char *prefix, int setup_prefix, const char *name_hint, int remote); const char *archive_format_from_filename(const char *filename); +extern void *sha1_file_to_archive(const struct archiver_args *args, + const char *path, const unsigned char *sha1, + unsigned int mode, enum object_type *type, + unsigned long *sizep); #endif /* ARCHIVE_H */ -- cgit v1.2.3 From 5544049def9a80bc5ea09a5649e13c1b56160518 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nguy=E1=BB=85n=20Th=C3=A1i=20Ng=E1=BB=8Dc=20Duy?= Date: Thu, 3 May 2012 08:51:04 +0700 Subject: archive-tar: stream large blobs to tar file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit t5000 verifies output while t1050 makes sure the command always respects core.bigfilethreshold Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-tar.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++----- t/t1050-large.sh | 4 ++++ t/t5000-tar-tree.sh | 6 ++++++ 3 files changed, 61 insertions(+), 5 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 3be0cdf350..93387ea336 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -4,6 +4,7 @@ #include "cache.h" #include "tar.h" #include "archive.h" +#include "streaming.h" #include "run-command.h" #define RECORDSIZE (512) @@ -30,10 +31,9 @@ static void write_if_needed(void) * queues up writes, so that all our write(2) calls write exactly one * full block; pads writes to RECORDSIZE */ -static void write_blocked(const void *data, unsigned long size) +static void do_write_blocked(const void *data, unsigned long size) { const char *buf = data; - unsigned long tail; if (offset) { unsigned long chunk = BLOCKSIZE - offset; @@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size) memcpy(block + offset, buf, size); offset += size; } +} + +static void finish_record(void) +{ + unsigned long tail; tail = offset % RECORDSIZE; if (tail) { memset(block + offset, 0, RECORDSIZE - tail); @@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size) write_if_needed(); } +static void write_blocked(const void *data, unsigned long size) +{ + do_write_blocked(data, size); + finish_record(); +} + /* * The end of tar archives is marked by 2*512 nul bytes and after that * follows the rest of the block (if any). @@ -77,6 +88,33 @@ static void write_trailer(void) } } +/* + * queues up writes, so that all our write(2) calls write exactly one + * full block; pads writes to RECORDSIZE + */ +static int stream_blocked(const unsigned char *sha1) +{ + struct git_istream *st; + enum object_type type; + unsigned long sz; + char buf[BLOCKSIZE]; + ssize_t readlen; + + st = open_istream(sha1, &type, &sz, NULL); + if (!st) + return error("cannot stream blob %s", sha1_to_hex(sha1)); + for (;;) { + readlen = read_istream(st, buf, sizeof(buf)); + if (readlen <= 0) + break; + do_write_blocked(buf, readlen); + } + close_istream(st); + if (!readlen) + finish_record(); + return readlen; +} + /* * pax extended header records have the format "%u %s=%s\n". %u contains * the size of the whole string (including the %u), the first %s is the @@ -203,7 +241,11 @@ static int write_tar_entry(struct archiver_args *args, } else memcpy(header.name, path, pathlen); - if (S_ISLNK(mode) || S_ISREG(mode)) { + if (S_ISREG(mode) && !args->convert && + sha1_object_info(sha1, &size) == OBJ_BLOB && + size > big_file_threshold) + buffer = NULL; + else if (S_ISLNK(mode) || S_ISREG(mode)) { enum object_type type; buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size); if (!buffer) @@ -235,8 +277,12 @@ static int write_tar_entry(struct archiver_args *args, } strbuf_release(&ext_header); write_blocked(&header, sizeof(header)); - if (S_ISREG(mode) && buffer && size > 0) - write_blocked(buffer, size); + if (S_ISREG(mode) && size > 0) { + if (buffer) + write_blocked(buffer, size); + else + err = stream_blocked(sha1); + } free(buffer); return err; } diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 4d127f19b7..fe475542f5 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -134,4 +134,8 @@ test_expect_success 'repack' ' git repack -ad ' +test_expect_success 'tar achiving' ' + git archive --format=tar HEAD >/dev/null +' + test_done diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 527c9e7548..d9b997f5d3 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -84,6 +84,12 @@ test_expect_success \ 'git archive vs. git tar-tree' \ 'test_cmp b.tar b2.tar' +test_expect_success 'git archive on large files' ' + test_config core.bigfilethreshold 1 && + git archive HEAD >b3.tar && + test_cmp b.tar b3.tar +' + test_expect_success \ 'git archive in a bare repo' \ '(cd bare.git && git archive HEAD) >b3.tar' -- cgit v1.2.3 From 60df6bd19ad40e3eae2926f3785a63e670c150ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:05 +0700 Subject: archive-zip: remove uncompressed_size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need size and compressed_size. Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 716cc42710..400ba38c7d 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -129,7 +129,6 @@ static int write_zip_entry(struct archiver_args *args, struct zip_dir_header dirent; unsigned long attr2; unsigned long compressed_size; - unsigned long uncompressed_size; unsigned long crc; unsigned long direntsize; int method; @@ -149,7 +148,7 @@ static int write_zip_entry(struct archiver_args *args, method = 0; attr2 = 16; out = NULL; - uncompressed_size = 0; + size = 0; compressed_size = 0; buffer = NULL; size = 0; @@ -166,7 +165,6 @@ static int write_zip_entry(struct archiver_args *args, method = 8; crc = crc32(crc, buffer, size); out = buffer; - uncompressed_size = size; compressed_size = size; } else { return error("unsupported file mode: 0%o (SHA1: %s)", mode, @@ -204,7 +202,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.mdate, zip_date); copy_le32(dirent.crc32, crc); copy_le32(dirent.compressed_size, compressed_size); - copy_le32(dirent.size, uncompressed_size); + copy_le32(dirent.size, size); copy_le16(dirent.filename_length, pathlen); copy_le16(dirent.extra_length, 0); copy_le16(dirent.comment_length, 0); @@ -226,7 +224,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(header.mdate, zip_date); copy_le32(header.crc32, crc); copy_le32(header.compressed_size, compressed_size); - copy_le32(header.size, uncompressed_size); + copy_le32(header.size, size); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); -- cgit v1.2.3 From ebf5374afa87afa334b040faec35144c2a3d03d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:06 +0700 Subject: archive-zip: factor out helpers for writing sizes and CRC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We're going to reuse them soon for streaming. Also, update the ZIP directory only at the very end, which will also make streaming easier. Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 400ba38c7d..678569ab2c 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -120,6 +120,26 @@ static void *zlib_deflate(void *data, unsigned long size, return buffer; } +static void set_zip_dir_data_desc(struct zip_dir_header *header, + unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + copy_le32(header->crc32, crc); + copy_le32(header->compressed_size, compressed_size); + copy_le32(header->size, size); +} + +static void set_zip_header_data_desc(struct zip_local_header *header, + unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + copy_le32(header->crc32, crc); + copy_le32(header->compressed_size, compressed_size); + copy_le32(header->size, size); +} + static int write_zip_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, @@ -200,9 +220,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.compression_method, method); copy_le16(dirent.mtime, zip_time); copy_le16(dirent.mdate, zip_date); - copy_le32(dirent.crc32, crc); - copy_le32(dirent.compressed_size, compressed_size); - copy_le32(dirent.size, size); + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); copy_le16(dirent.filename_length, pathlen); copy_le16(dirent.extra_length, 0); copy_le16(dirent.comment_length, 0); @@ -210,11 +228,6 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.attr1, 0); copy_le32(dirent.attr2, attr2); copy_le32(dirent.offset, zip_offset); - memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); - zip_dir_offset += ZIP_DIR_HEADER_SIZE; - memcpy(zip_dir + zip_dir_offset, path, pathlen); - zip_dir_offset += pathlen; - zip_dir_entries++; copy_le32(header.magic, 0x04034b50); copy_le16(header.version, 10); @@ -222,9 +235,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); copy_le16(header.mdate, zip_date); - copy_le32(header.crc32, crc); - copy_le32(header.compressed_size, compressed_size); - copy_le32(header.size, size); + set_zip_header_data_desc(&header, size, compressed_size, crc); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); @@ -239,6 +250,12 @@ static int write_zip_entry(struct archiver_args *args, free(deflated); free(buffer); + memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); + zip_dir_offset += ZIP_DIR_HEADER_SIZE; + memcpy(zip_dir + zip_dir_offset, path, pathlen); + zip_dir_offset += pathlen; + zip_dir_entries++; + return 0; } -- cgit v1.2.3 From 2158f883d99a92f801534c91294305ccbe171f79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:07 +0700 Subject: archive-zip: streaming for stored files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Write a data descriptor containing the CRC of the entry and its sizes after streaming it out. For simplicity, do that only if we're storing files (option -0) for now. t5000 verifies output. t1050 makes sure the command always respects core.bigfilethreshold Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++------- t/t1050-large.sh | 4 +++ t/t5000-tar-tree.sh | 6 ++++ 3 files changed, 88 insertions(+), 12 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 678569ab2c..1c6c39d42c 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -3,6 +3,7 @@ */ #include "cache.h" #include "archive.h" +#include "streaming.h" static int zip_date; static int zip_time; @@ -15,6 +16,7 @@ static unsigned int zip_dir_offset; static unsigned int zip_dir_entries; #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) +#define ZIP_STREAM (8) struct zip_local_header { unsigned char magic[4]; @@ -31,6 +33,14 @@ struct zip_local_header { unsigned char _end[1]; }; +struct zip_data_desc { + unsigned char magic[4]; + unsigned char crc32[4]; + unsigned char compressed_size[4]; + unsigned char size[4]; + unsigned char _end[1]; +}; + struct zip_dir_header { unsigned char magic[4]; unsigned char creator_version[2]; @@ -70,6 +80,7 @@ struct zip_dir_trailer { * we're interested in. */ #define ZIP_LOCAL_HEADER_SIZE offsetof(struct zip_local_header, _end) +#define ZIP_DATA_DESC_SIZE offsetof(struct zip_data_desc, _end) #define ZIP_DIR_HEADER_SIZE offsetof(struct zip_dir_header, _end) #define ZIP_DIR_TRAILER_SIZE offsetof(struct zip_dir_trailer, _end) @@ -120,6 +131,19 @@ static void *zlib_deflate(void *data, unsigned long size, return buffer; } +static void write_zip_data_desc(unsigned long size, + unsigned long compressed_size, + unsigned long crc) +{ + struct zip_data_desc trailer; + + copy_le32(trailer.magic, 0x08074b50); + copy_le32(trailer.crc32, crc); + copy_le32(trailer.compressed_size, compressed_size); + copy_le32(trailer.size, size); + write_or_die(1, &trailer, ZIP_DATA_DESC_SIZE); +} + static void set_zip_dir_data_desc(struct zip_dir_header *header, unsigned long size, unsigned long compressed_size, @@ -140,6 +164,8 @@ static void set_zip_header_data_desc(struct zip_local_header *header, copy_le32(header->size, size); } +#define STREAM_BUFFER_SIZE (1024 * 16) + static int write_zip_entry(struct archiver_args *args, const unsigned char *sha1, const char *path, size_t pathlen, @@ -155,6 +181,8 @@ static int write_zip_entry(struct archiver_args *args, unsigned char *out; void *deflated = NULL; void *buffer; + struct git_istream *stream = NULL; + unsigned long flags = 0; unsigned long size; crc = crc32(0, NULL, 0); @@ -173,25 +201,38 @@ static int write_zip_entry(struct archiver_args *args, buffer = NULL; size = 0; } else if (S_ISREG(mode) || S_ISLNK(mode)) { - enum object_type type; - buffer = sha1_file_to_archive(args, path, sha1, mode, &type, &size); - if (!buffer) - return error("cannot read %s", sha1_to_hex(sha1)); + enum object_type type = sha1_object_info(sha1, &size); method = 0; attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; - if (S_ISREG(mode) && args->compression_level != 0) + if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; - crc = crc32(crc, buffer, size); - out = buffer; compressed_size = size; + + if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && + size > big_file_threshold && method == 0) { + stream = open_istream(sha1, &type, &size, NULL); + if (!stream) + return error("cannot stream blob %s", + sha1_to_hex(sha1)); + flags |= ZIP_STREAM; + out = buffer = NULL; + } else { + buffer = sha1_file_to_archive(args, path, sha1, mode, + &type, &size); + if (!buffer) + return error("cannot read %s", + sha1_to_hex(sha1)); + crc = crc32(crc, buffer, size); + out = buffer; + } } else { return error("unsupported file mode: 0%o (SHA1: %s)", mode, sha1_to_hex(sha1)); } - if (method == 8) { + if (buffer && method == 8) { deflated = zlib_deflate(buffer, size, args->compression_level, &compressed_size); if (deflated && compressed_size - 6 < size) { @@ -216,7 +257,7 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.creator_version, S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); copy_le16(dirent.version, 10); - copy_le16(dirent.flags, 0); + copy_le16(dirent.flags, flags); copy_le16(dirent.compression_method, method); copy_le16(dirent.mtime, zip_time); copy_le16(dirent.mdate, zip_date); @@ -231,18 +272,43 @@ static int write_zip_entry(struct archiver_args *args, copy_le32(header.magic, 0x04034b50); copy_le16(header.version, 10); - copy_le16(header.flags, 0); + copy_le16(header.flags, flags); copy_le16(header.compression_method, method); copy_le16(header.mtime, zip_time); copy_le16(header.mdate, zip_date); - set_zip_header_data_desc(&header, size, compressed_size, crc); + if (flags & ZIP_STREAM) + set_zip_header_data_desc(&header, 0, 0, 0); + else + set_zip_header_data_desc(&header, size, compressed_size, crc); copy_le16(header.filename_length, pathlen); copy_le16(header.extra_length, 0); write_or_die(1, &header, ZIP_LOCAL_HEADER_SIZE); zip_offset += ZIP_LOCAL_HEADER_SIZE; write_or_die(1, path, pathlen); zip_offset += pathlen; - if (compressed_size > 0) { + if (stream && method == 0) { + unsigned char buf[STREAM_BUFFER_SIZE]; + ssize_t readlen; + + for (;;) { + readlen = read_istream(stream, buf, sizeof(buf)); + if (readlen <= 0) + break; + crc = crc32(crc, buf, readlen); + write_or_die(1, buf, readlen); + } + close_istream(stream); + if (readlen) + return readlen; + + compressed_size = size; + zip_offset += compressed_size; + + write_zip_data_desc(size, compressed_size, crc); + zip_offset += ZIP_DATA_DESC_SIZE; + + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); + } else if (compressed_size > 0) { write_or_die(1, out, compressed_size); zip_offset += compressed_size; } diff --git a/t/t1050-large.sh b/t/t1050-large.sh index fe475542f5..9db54b56bc 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -138,4 +138,8 @@ test_expect_success 'tar achiving' ' git archive --format=tar HEAD >/dev/null ' +test_expect_success 'zip achiving, store only' ' + git archive --format=zip -0 HEAD >/dev/null +' + test_done diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index d9b997f5d3..3b54c38621 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -244,6 +244,12 @@ test_expect_success UNZIP \ 'validate file contents with prefix' \ 'diff -r a e/prefix/a' +test_expect_success UNZIP 'git archive -0 --format=zip on large files' ' + test_config core.bigfilethreshold 1 && + git archive -0 --format=zip HEAD >large.zip && + (mkdir large && cd large && $UNZIP ../large.zip) +' + test_expect_success \ 'git archive --list outside of a git repo' \ 'GIT_DIR=some/non-existing/directory git archive --list' -- cgit v1.2.3 From c743c21591f9433fe784ac38902872701ce2e850 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 08:51:08 +0700 Subject: archive-zip: streaming for deflated files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After an entry has been streamed out, its CRC and sizes are written as part of a data descriptor. For simplicity, we make the buffer for the compressed chunks twice as big as for the uncompressed ones, to be sure the result fit in even if deflate makes them bigger. t5000 verifies output. t1050 makes sure the command always respects core.bigfilethreshold Signed-off-by: Rene Scharfe Signed-off-by: Nguyễn Thái Ngọc Duy Signed-off-by: Junio C Hamano --- archive-zip.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++- t/t1050-large.sh | 4 ++++ t/t5000-tar-tree.sh | 7 ++++++ 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/archive-zip.c b/archive-zip.c index 1c6c39d42c..f5af81f904 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -211,7 +211,7 @@ static int write_zip_entry(struct archiver_args *args, compressed_size = size; if (S_ISREG(mode) && type == OBJ_BLOB && !args->convert && - size > big_file_threshold && method == 0) { + size > big_file_threshold) { stream = open_istream(sha1, &type, &size, NULL); if (!stream) return error("cannot stream blob %s", @@ -307,6 +307,68 @@ static int write_zip_entry(struct archiver_args *args, write_zip_data_desc(size, compressed_size, crc); zip_offset += ZIP_DATA_DESC_SIZE; + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); + } else if (stream && method == 8) { + unsigned char buf[STREAM_BUFFER_SIZE]; + ssize_t readlen; + git_zstream zstream; + int result; + size_t out_len; + unsigned char compressed[STREAM_BUFFER_SIZE * 2]; + + memset(&zstream, 0, sizeof(zstream)); + git_deflate_init(&zstream, args->compression_level); + + compressed_size = 0; + zstream.next_out = compressed; + zstream.avail_out = sizeof(compressed); + + for (;;) { + readlen = read_istream(stream, buf, sizeof(buf)); + if (readlen <= 0) + break; + crc = crc32(crc, buf, readlen); + + zstream.next_in = buf; + zstream.avail_in = readlen; + result = git_deflate(&zstream, 0); + if (result != Z_OK) + die("deflate error (%d)", result); + out = compressed; + if (!compressed_size) + out += 2; + out_len = zstream.next_out - out; + + if (out_len > 0) { + write_or_die(1, out, out_len); + compressed_size += out_len; + zstream.next_out = compressed; + zstream.avail_out = sizeof(compressed); + } + + } + close_istream(stream); + if (readlen) + return readlen; + + zstream.next_in = buf; + zstream.avail_in = 0; + result = git_deflate(&zstream, Z_FINISH); + if (result != Z_STREAM_END) + die("deflate error (%d)", result); + + git_deflate_end(&zstream); + out = compressed; + if (!compressed_size) + out += 2; + out_len = zstream.next_out - out - 4; + write_or_die(1, out, out_len); + compressed_size += out_len; + zip_offset += compressed_size; + + write_zip_data_desc(size, compressed_size, crc); + zip_offset += ZIP_DATA_DESC_SIZE; + set_zip_dir_data_desc(&dirent, size, compressed_size, crc); } else if (compressed_size > 0) { write_or_die(1, out, compressed_size); diff --git a/t/t1050-large.sh b/t/t1050-large.sh index 9db54b56bc..55ed955cef 100755 --- a/t/t1050-large.sh +++ b/t/t1050-large.sh @@ -142,4 +142,8 @@ test_expect_success 'zip achiving, store only' ' git archive --format=zip -0 HEAD >/dev/null ' +test_expect_success 'zip achiving, deflate' ' + git archive --format=zip HEAD >/dev/null +' + test_done diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 3b54c38621..94f2ebac5f 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -250,6 +250,13 @@ test_expect_success UNZIP 'git archive -0 --format=zip on large files' ' (mkdir large && cd large && $UNZIP ../large.zip) ' +test_expect_success UNZIP 'git archive --format=zip on large files' ' + test_config core.bigfilethreshold 1 && + git archive --format=zip HEAD >large-compressed.zip && + (mkdir large-compressed && cd large-compressed && $UNZIP ../large-compressed.zip) && + test_cmp large-compressed/a/bin/sh large/a/bin/sh +' + test_expect_success \ 'git archive --list outside of a git repo' \ 'GIT_DIR=some/non-existing/directory git archive --list' -- cgit v1.2.3 From 2dd42334dea6619c0774511beda9a02642088f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 3 May 2012 10:52:16 +0200 Subject: t5000: rationalize unzip tests Factor out a function for checking the contents of ZIP archives. It extracts their contents and compares them to the original files. This removes some duplicate code. Tests that just create archives can lose their UNZIP prerequisite. Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- t/t5000-tar-tree.sh | 79 ++++++++++++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 40 deletions(-) diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 94f2ebac5f..ecf00edab2 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -31,6 +31,26 @@ GUNZIP=${GUNZIP:-gzip -d} SUBSTFORMAT=%H%n +check_zip() { + zipfile=$1.zip + listfile=$1.lst + dir=$1 + dir_with_prefix=$dir/$2 + + test_expect_success UNZIP " extract ZIP archive" " + (mkdir $dir && cd $dir && $UNZIP ../$zipfile) + " + + test_expect_success UNZIP " validate filenames" " + (cd ${dir_with_prefix}a && find .) | sort >$listfile && + test_cmp a.lst $listfile + " + + test_expect_success UNZIP " validate file contents" " + diff -r a ${dir_with_prefix}a + " +} + test_expect_success \ 'populate workdir' \ 'mkdir a b c && @@ -181,10 +201,19 @@ test_expect_success \ test_cmp a/substfile2 g/prefix/a/substfile2 ' +$UNZIP -v >/dev/null 2>&1 +if [ $? -eq 127 ]; then + say "Skipping ZIP tests, because unzip was not found" +else + test_set_prereq UNZIP +fi + test_expect_success \ 'git archive --format=zip' \ 'git archive --format=zip HEAD >d.zip' +check_zip d + test_expect_success \ 'git archive --format=zip in a bare repo' \ '(cd bare.git && git archive --format=zip HEAD) >d1.zip' @@ -207,55 +236,25 @@ test_expect_success 'git archive with --output, override inferred format' ' test_cmp b.tar d4.zip ' -$UNZIP -v >/dev/null 2>&1 -if [ $? -eq 127 ]; then - say "Skipping ZIP tests, because unzip was not found" -else - test_set_prereq UNZIP -fi - -test_expect_success UNZIP \ - 'extract ZIP archive' \ - '(mkdir d && cd d && $UNZIP ../d.zip)' - -test_expect_success UNZIP \ - 'validate filenames' \ - '(cd d/a && find .) | sort >d.lst && - test_cmp a.lst d.lst' - -test_expect_success UNZIP \ - 'validate file contents' \ - 'diff -r a d/a' - test_expect_success \ 'git archive --format=zip with prefix' \ 'git archive --format=zip --prefix=prefix/ HEAD >e.zip' -test_expect_success UNZIP \ - 'extract ZIP archive with prefix' \ - '(mkdir e && cd e && $UNZIP ../e.zip)' +check_zip e prefix/ -test_expect_success UNZIP \ - 'validate filenames with prefix' \ - '(cd e/prefix/a && find .) | sort >e.lst && - test_cmp a.lst e.lst' +test_expect_success 'git archive -0 --format=zip on large files' ' + test_config core.bigfilethreshold 1 && + git archive -0 --format=zip HEAD >large.zip +' -test_expect_success UNZIP \ - 'validate file contents with prefix' \ - 'diff -r a e/prefix/a' +check_zip large -test_expect_success UNZIP 'git archive -0 --format=zip on large files' ' - test_config core.bigfilethreshold 1 && - git archive -0 --format=zip HEAD >large.zip && - (mkdir large && cd large && $UNZIP ../large.zip) +test_expect_success 'git archive --format=zip on large files' ' + test_config core.bigfilethreshold 1 && + git archive --format=zip HEAD >large-compressed.zip ' -test_expect_success UNZIP 'git archive --format=zip on large files' ' - test_config core.bigfilethreshold 1 && - git archive --format=zip HEAD >large-compressed.zip && - (mkdir large-compressed && cd large-compressed && $UNZIP ../large-compressed.zip) && - test_cmp large-compressed/a/bin/sh large/a/bin/sh -' +check_zip large-compressed test_expect_success \ 'git archive --list outside of a git repo' \ -- cgit v1.2.3 From 72b64b44e77112b93e7b046e54c62a01e69cef3d Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:08 +0200 Subject: get_ref_dir(): use a strbuf to hold refname This simplifies the bookkeeping and allows an (artificial) restriction on refname component length to be removed. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 54 ++++++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 28 deletions(-) diff --git a/refs.c b/refs.c index d539241d98..ed6d84230d 100644 --- a/refs.c +++ b/refs.c @@ -756,7 +756,7 @@ static void get_ref_dir(struct ref_cache *refs, const char *base, const char *path; struct dirent *de; int baselen; - char *refname; + struct strbuf refname; if (*refs->name) path = git_path_submodule(refs->name, "%s", base); @@ -768,50 +768,48 @@ static void get_ref_dir(struct ref_cache *refs, const char *base, return; baselen = strlen(base); - refname = xmalloc(baselen + 257); - - memcpy(refname, base, baselen); - if (baselen && base[baselen-1] != '/') - refname[baselen++] = '/'; + strbuf_init(&refname, baselen + 257); + strbuf_add(&refname, base, baselen); + if (baselen && base[baselen-1] != '/') { + strbuf_addch(&refname, '/'); + baselen++; + } while ((de = readdir(d)) != NULL) { unsigned char sha1[20]; struct stat st; int flag; - int namelen; const char *refdir; if (de->d_name[0] == '.') continue; - namelen = strlen(de->d_name); - if (namelen > 255) - continue; if (has_extension(de->d_name, ".lock")) continue; - memcpy(refname + baselen, de->d_name, namelen+1); + strbuf_addstr(&refname, de->d_name); refdir = *refs->name - ? git_path_submodule(refs->name, "%s", refname) - : git_path("%s", refname); - if (stat(refdir, &st) < 0) - continue; - if (S_ISDIR(st.st_mode)) { - get_ref_dir(refs, refname, dir); - continue; - } - if (*refs->name) { - hashclr(sha1); - flag = 0; - if (resolve_gitlink_ref(refs->name, refname, sha1) < 0) { + ? git_path_submodule(refs->name, "%s", refname.buf) + : git_path("%s", refname.buf); + if (stat(refdir, &st) < 0) { + ; /* silently ignore */ + } else if (S_ISDIR(st.st_mode)) { + get_ref_dir(refs, refname.buf, dir); + } else { + if (*refs->name) { + hashclr(sha1); + flag = 0; + if (resolve_gitlink_ref(refs->name, refname.buf, sha1) < 0) { + hashclr(sha1); + flag |= REF_ISBROKEN; + } + } else if (read_ref_full(refname.buf, sha1, 1, &flag)) { hashclr(sha1); flag |= REF_ISBROKEN; } - } else if (read_ref_full(refname, sha1, 1, &flag)) { - hashclr(sha1); - flag |= REF_ISBROKEN; + add_ref(dir, create_ref_entry(refname.buf, sha1, flag, 1)); } - add_ref(dir, create_ref_entry(refname, sha1, flag, 1)); + strbuf_setlen(&refname, baselen); } - free(refname); + strbuf_release(&refname); closedir(d); } -- cgit v1.2.3 From 66a3d20b8f8566581e8aa46e35555f353074f232 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:09 +0200 Subject: get_ref_dir(): rename "base" parameter to "dirname" Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/refs.c b/refs.c index ed6d84230d..37ea557f3e 100644 --- a/refs.c +++ b/refs.c @@ -749,30 +749,30 @@ void add_packed_ref(const char *refname, const unsigned char *sha1) create_ref_entry(refname, sha1, REF_ISPACKED, 1)); } -static void get_ref_dir(struct ref_cache *refs, const char *base, +static void get_ref_dir(struct ref_cache *refs, const char *dirname, struct ref_dir *dir) { DIR *d; const char *path; struct dirent *de; - int baselen; + int dirnamelen; struct strbuf refname; if (*refs->name) - path = git_path_submodule(refs->name, "%s", base); + path = git_path_submodule(refs->name, "%s", dirname); else - path = git_path("%s", base); + path = git_path("%s", dirname); d = opendir(path); if (!d) return; - baselen = strlen(base); - strbuf_init(&refname, baselen + 257); - strbuf_add(&refname, base, baselen); - if (baselen && base[baselen-1] != '/') { + dirnamelen = strlen(dirname); + strbuf_init(&refname, dirnamelen + 257); + strbuf_add(&refname, dirname, dirnamelen); + if (dirnamelen && dirname[dirnamelen-1] != '/') { strbuf_addch(&refname, '/'); - baselen++; + dirnamelen++; } while ((de = readdir(d)) != NULL) { @@ -807,7 +807,7 @@ static void get_ref_dir(struct ref_cache *refs, const char *base, } add_ref(dir, create_ref_entry(refname.buf, sha1, flag, 1)); } - strbuf_setlen(&refname, baselen); + strbuf_setlen(&refname, dirnamelen); } strbuf_release(&refname); closedir(d); -- cgit v1.2.3 From abc390989f1086aa3a8620a81f87622a78cf393b Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:10 +0200 Subject: get_ref_dir(): require that the dirname argument ends in '/' This removes some conditional code and makes it consistent with the way that direntry names are stored. Please note that this function is never used on the top-level .git directory; it is always called for directories at level .git/refs or deeper. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/refs.c b/refs.c index 37ea557f3e..113739f14f 100644 --- a/refs.c +++ b/refs.c @@ -749,13 +749,17 @@ void add_packed_ref(const char *refname, const unsigned char *sha1) create_ref_entry(refname, sha1, REF_ISPACKED, 1)); } +/* + * Read the loose references for refs from the namespace dirname. + * dirname must end with '/'. + */ static void get_ref_dir(struct ref_cache *refs, const char *dirname, struct ref_dir *dir) { DIR *d; const char *path; struct dirent *de; - int dirnamelen; + int dirnamelen = strlen(dirname); struct strbuf refname; if (*refs->name) @@ -767,13 +771,8 @@ static void get_ref_dir(struct ref_cache *refs, const char *dirname, if (!d) return; - dirnamelen = strlen(dirname); strbuf_init(&refname, dirnamelen + 257); strbuf_add(&refname, dirname, dirnamelen); - if (dirnamelen && dirname[dirnamelen-1] != '/') { - strbuf_addch(&refname, '/'); - dirnamelen++; - } while ((de = readdir(d)) != NULL) { unsigned char sha1[20]; @@ -792,6 +791,7 @@ static void get_ref_dir(struct ref_cache *refs, const char *dirname, if (stat(refdir, &st) < 0) { ; /* silently ignore */ } else if (S_ISDIR(st.st_mode)) { + strbuf_addch(&refname, '/'); get_ref_dir(refs, refname.buf, dir); } else { if (*refs->name) { @@ -816,7 +816,7 @@ static void get_ref_dir(struct ref_cache *refs, const char *dirname, static struct ref_dir *get_loose_refs(struct ref_cache *refs) { if (!refs->did_loose) { - get_ref_dir(refs, "refs", &refs->loose); + get_ref_dir(refs, "refs/", &refs->loose); refs->did_loose = 1; } return &refs->loose; -- cgit v1.2.3 From f348ac923c9f834c3cdc434e6266872cf5710b71 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:11 +0200 Subject: refs.c: extract function search_for_subdir() Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/refs.c b/refs.c index 113739f14f..9471b1d8a6 100644 --- a/refs.c +++ b/refs.c @@ -276,6 +276,27 @@ static struct ref_entry *search_ref_dir(struct ref_dir *dir, const char *refname return *r; } +/* + * Search for a directory entry directly within dir (without + * recursing). Sort dir if necessary. subdirname must be a directory + * name (i.e., end in '/'). If mkdir is set, then create the + * directory if it is missing; otherwise, return NULL if the desired + * directory cannot be found. + */ +static struct ref_entry *search_for_subdir(struct ref_dir *dir, + const char *subdirname, int mkdir) +{ + struct ref_entry *entry = search_ref_dir(dir, subdirname); + if (!entry) { + if (!mkdir) + return NULL; + entry = create_dir_entry(subdirname); + add_entry_to_dir(dir, entry); + } + assert(entry->flag & REF_DIR); + return entry; +} + /* * If refname is a reference name, find the ref_dir within the dir * tree that should hold refname. If refname is a directory name @@ -294,17 +315,10 @@ static struct ref_dir *find_containing_dir(struct ref_dir *dir, for (slash = strchr(refname_copy, '/'); slash; slash = strchr(slash + 1, '/')) { char tmp = slash[1]; slash[1] = '\0'; - entry = search_ref_dir(dir, refname_copy); - if (!entry) { - if (!mkdir) { - dir = NULL; - break; - } - entry = create_dir_entry(refname_copy); - add_entry_to_dir(dir, entry); - } + entry = search_for_subdir(dir, refname_copy, mkdir); slash[1] = tmp; - assert(entry->flag & REF_DIR); + if (!entry) + break; dir = &entry->u.subdir; } -- cgit v1.2.3 From 9f2fb4a3737c86e245ce365f6ad8f901ad397d6f Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:12 +0200 Subject: get_ref_dir(): take the containing directory as argument Previously, the "dir" argument to get_ref_dir() was a pointer to the top-level ref_dir. Change the function to expect a pointer to the ref_dir corresponding to dirname. This allows entries to be added directly to dir, without having to recurse through the reference trie each time (i.e., we can use add_entry_to_dir() instead of add_ref()). Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/refs.c b/refs.c index 9471b1d8a6..afd0cf75bc 100644 --- a/refs.c +++ b/refs.c @@ -765,7 +765,8 @@ void add_packed_ref(const char *refname, const unsigned char *sha1) /* * Read the loose references for refs from the namespace dirname. - * dirname must end with '/'. + * dirname must end with '/'. dir must be the directory entry + * corresponding to dirname. */ static void get_ref_dir(struct ref_cache *refs, const char *dirname, struct ref_dir *dir) @@ -806,7 +807,8 @@ static void get_ref_dir(struct ref_cache *refs, const char *dirname, ; /* silently ignore */ } else if (S_ISDIR(st.st_mode)) { strbuf_addch(&refname, '/'); - get_ref_dir(refs, refname.buf, dir); + get_ref_dir(refs, refname.buf, + &search_for_subdir(dir, refname.buf, 1)->u.subdir); } else { if (*refs->name) { hashclr(sha1); @@ -819,7 +821,8 @@ static void get_ref_dir(struct ref_cache *refs, const char *dirname, hashclr(sha1); flag |= REF_ISBROKEN; } - add_ref(dir, create_ref_entry(refname.buf, sha1, flag, 1)); + add_entry_to_dir(dir, + create_ref_entry(refname.buf, sha1, flag, 1)); } strbuf_setlen(&refname, dirnamelen); } @@ -830,7 +833,8 @@ static void get_ref_dir(struct ref_cache *refs, const char *dirname, static struct ref_dir *get_loose_refs(struct ref_cache *refs) { if (!refs->did_loose) { - get_ref_dir(refs, "refs/", &refs->loose); + get_ref_dir(refs, "refs/", + &search_for_subdir(&refs->loose, "refs/", 1)->u.subdir); refs->did_loose = 1; } return &refs->loose; -- cgit v1.2.3 From 93c603fcb7bfbe3d4bfb108463166b850de638f3 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:13 +0200 Subject: do_for_each_reflog(): return early on error Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 70 +++++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/refs.c b/refs.c index afd0cf75bc..8c39bd76e5 100644 --- a/refs.c +++ b/refs.c @@ -2246,47 +2246,47 @@ static int do_for_each_reflog(const char *base, each_ref_fn fn, void *cb_data) { DIR *d = opendir(git_path("logs/%s", base)); int retval = 0; + struct dirent *de; + int baselen; + char *log; - if (d) { - struct dirent *de; - int baselen = strlen(base); - char *log = xmalloc(baselen + 257); + if (!d) + return *base ? errno : 0; - memcpy(log, base, baselen); - if (baselen && base[baselen-1] != '/') - log[baselen++] = '/'; + baselen = strlen(base); + log = xmalloc(baselen + 257); + memcpy(log, base, baselen); + if (baselen && base[baselen-1] != '/') + log[baselen++] = '/'; - while ((de = readdir(d)) != NULL) { - struct stat st; - int namelen; + while ((de = readdir(d)) != NULL) { + struct stat st; + int namelen; - if (de->d_name[0] == '.') - continue; - namelen = strlen(de->d_name); - if (namelen > 255) - continue; - if (has_extension(de->d_name, ".lock")) - continue; - memcpy(log + baselen, de->d_name, namelen+1); - if (stat(git_path("logs/%s", log), &st) < 0) - continue; - if (S_ISDIR(st.st_mode)) { - retval = do_for_each_reflog(log, fn, cb_data); - } else { - unsigned char sha1[20]; - if (read_ref_full(log, sha1, 0, NULL)) - retval = error("bad ref for %s", log); - else - retval = fn(log, sha1, 0, cb_data); - } - if (retval) - break; + if (de->d_name[0] == '.') + continue; + namelen = strlen(de->d_name); + if (namelen > 255) + continue; + if (has_extension(de->d_name, ".lock")) + continue; + memcpy(log + baselen, de->d_name, namelen+1); + if (stat(git_path("logs/%s", log), &st) < 0) + continue; + if (S_ISDIR(st.st_mode)) { + retval = do_for_each_reflog(log, fn, cb_data); + } else { + unsigned char sha1[20]; + if (read_ref_full(log, sha1, 0, NULL)) + retval = error("bad ref for %s", log); + else + retval = fn(log, sha1, 0, cb_data); } - free(log); - closedir(d); + if (retval) + break; } - else if (*base) - return errno; + free(log); + closedir(d); return retval; } -- cgit v1.2.3 From 989c0e5d02b1844b44e5ea2ff61a2cbd2f054a25 Mon Sep 17 00:00:00 2001 From: Michael Haggerty Date: Wed, 25 Apr 2012 00:45:14 +0200 Subject: do_for_each_reflog(): use a strbuf to hold logfile name This simplifies the bookkeeping and allows an (artificial) restriction on refname component length to be removed. Signed-off-by: Michael Haggerty Signed-off-by: Junio C Hamano --- refs.c | 60 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/refs.c b/refs.c index 8c39bd76e5..0d81b9e267 100644 --- a/refs.c +++ b/refs.c @@ -2242,57 +2242,59 @@ int for_each_reflog_ent(const char *refname, each_reflog_ent_fn fn, void *cb_dat return for_each_recent_reflog_ent(refname, fn, 0, cb_data); } -static int do_for_each_reflog(const char *base, each_ref_fn fn, void *cb_data) +/* + * Call fn for each reflog in the namespace indicated by name. name + * must be empty or end with '/'. Name will be used as a scratch + * space, but its contents will be restored before return. + */ +static int do_for_each_reflog(struct strbuf *name, each_ref_fn fn, void *cb_data) { - DIR *d = opendir(git_path("logs/%s", base)); + DIR *d = opendir(git_path("logs/%s", name->buf)); int retval = 0; struct dirent *de; - int baselen; - char *log; + int oldlen = name->len; if (!d) - return *base ? errno : 0; - - baselen = strlen(base); - log = xmalloc(baselen + 257); - memcpy(log, base, baselen); - if (baselen && base[baselen-1] != '/') - log[baselen++] = '/'; + return name->len ? errno : 0; while ((de = readdir(d)) != NULL) { struct stat st; - int namelen; if (de->d_name[0] == '.') continue; - namelen = strlen(de->d_name); - if (namelen > 255) - continue; if (has_extension(de->d_name, ".lock")) continue; - memcpy(log + baselen, de->d_name, namelen+1); - if (stat(git_path("logs/%s", log), &st) < 0) - continue; - if (S_ISDIR(st.st_mode)) { - retval = do_for_each_reflog(log, fn, cb_data); + strbuf_addstr(name, de->d_name); + if (stat(git_path("logs/%s", name->buf), &st) < 0) { + ; /* silently ignore */ } else { - unsigned char sha1[20]; - if (read_ref_full(log, sha1, 0, NULL)) - retval = error("bad ref for %s", log); - else - retval = fn(log, sha1, 0, cb_data); + if (S_ISDIR(st.st_mode)) { + strbuf_addch(name, '/'); + retval = do_for_each_reflog(name, fn, cb_data); + } else { + unsigned char sha1[20]; + if (read_ref_full(name->buf, sha1, 0, NULL)) + retval = error("bad ref for %s", name->buf); + else + retval = fn(name->buf, sha1, 0, cb_data); + } + if (retval) + break; } - if (retval) - break; + strbuf_setlen(name, oldlen); } - free(log); closedir(d); return retval; } int for_each_reflog(each_ref_fn fn, void *cb_data) { - return do_for_each_reflog("", fn, cb_data); + int retval; + struct strbuf name; + strbuf_init(&name, PATH_MAX); + retval = do_for_each_reflog(