diff options
-rw-r--r-- | Documentation/config.txt | 28 | ||||
-rw-r--r-- | builtin-pack-objects.c | 84 | ||||
-rw-r--r-- | builtin-verify-pack.c | 1 | ||||
-rw-r--r-- | cache.h | 28 | ||||
-rw-r--r-- | config.c | 17 | ||||
-rw-r--r-- | diff.c | 4 | ||||
-rw-r--r-- | environment.c | 2 | ||||
-rw-r--r-- | git-compat-util.h | 62 | ||||
-rw-r--r-- | pack-check.c | 64 | ||||
-rw-r--r-- | read-cache.c | 10 | ||||
-rw-r--r-- | refs.c | 2 | ||||
-rw-r--r-- | sha1_file.c | 435 | ||||
-rwxr-xr-x | t/t5301-sliding-window.sh | 60 | ||||
-rw-r--r-- | write_or_die.c | 16 |
14 files changed, 582 insertions, 231 deletions
diff --git a/Documentation/config.txt b/Documentation/config.txt index 4318bf9334..b4aae0d0ae 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -118,6 +118,34 @@ core.legacyheaders:: database directly (where the "http://" and "rsync://" protocols count as direct access). +core.packedGitWindowSize:: + Number of bytes of a pack file to map into memory in a + single mapping operation. Larger window sizes may allow + your system to process a smaller number of large pack files + more quickly. Smaller window sizes will negatively affect + performance due to increased calls to the operating system's + memory manager, but may improve performance when accessing + a large number of large pack files. ++ +Default is 1 MiB if NO_MMAP was set at compile time, otherwise 32 +MiB on 32 bit platforms and 1 GiB on 64 bit platforms. This should +be reasonable for all users/operating systems. You probably do +not need to adjust this value. ++ +Common unit suffixes of 'k', 'm', or 'g' are supported. + +core.packedGitLimit:: + Maximum number of bytes to map simultaneously into memory + from pack files. If Git needs to access more than this many + bytes at once to complete an operation it will unmap existing + regions to reclaim virtual address space within the process. ++ +Default is 256 MiB on 32 bit platforms and 8 GiB on 64 bit platforms. +This should be reasonable for all users/operating systems, except on +the largest projects. You probably do not need to adjust this value. ++ +Common unit suffixes of 'k', 'm', or 'g' are supported. + alias.*:: Command aliases for the gitlink:git[1] command wrapper - e.g. after defining "alias.last = cat-file commit HEAD", the invocation diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 9e15beb3ba..42dd8c87a2 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -276,7 +276,52 @@ static int encode_header(enum object_type type, unsigned long size, unsigned cha * we are going to reuse the existing object data as is. make * sure it is not corrupt. */ -static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect) +static int check_pack_inflate(struct packed_git *p, + struct pack_window **w_curs, + unsigned long offset, + unsigned long len, + unsigned long expect) +{ + z_stream stream; + unsigned char fakebuf[4096], *in; + int st; + + memset(&stream, 0, sizeof(stream)); + inflateInit(&stream); + do { + in = use_pack(p, w_curs, offset, &stream.avail_in); + stream.next_in = in; + stream.next_out = fakebuf; + stream.avail_out = sizeof(fakebuf); + st = inflate(&stream, Z_FINISH); + offset += stream.next_in - in; + } while (st == Z_OK || st == Z_BUF_ERROR); + inflateEnd(&stream); + return (st == Z_STREAM_END && + stream.total_out == expect && + stream.total_in == len) ? 0 : -1; +} + +static void copy_pack_data(struct sha1file *f, + struct packed_git *p, + struct pack_window **w_curs, + unsigned long offset, + unsigned long len) +{ + unsigned char *in; + unsigned int avail; + + while (len) { + in = use_pack(p, w_curs, offset, &avail); + if (avail > len) + avail = len; + sha1write(f, in, avail); + offset += avail; + len -= avail; + } +} + +static int check_loose_inflate(unsigned char *data, unsigned long len, unsigned long expect) { z_stream stream; unsigned char fakebuf[4096]; @@ -323,7 +368,7 @@ static int revalidate_loose_object(struct object_entry *entry, return -1; map += used; mapsize -= used; - return check_inflate(map, mapsize, size); + return check_loose_inflate(map, mapsize, size); } static unsigned long write_object(struct sha1file *f, @@ -416,6 +461,8 @@ static unsigned long write_object(struct sha1file *f, } else { struct packed_git *p = entry->in_pack; + struct pack_window *w_curs = NULL; + unsigned long offset; if (entry->delta) { obj_type = (allow_ofs_delta && entry->delta->offset) ? @@ -437,16 +484,14 @@ static unsigned long write_object(struct sha1file *f, hdrlen += 20; } - use_packed_git(p); - buf = (char *) p->pack_base - + entry->in_pack_offset - + entry->in_pack_header_size; + offset = entry->in_pack_offset + entry->in_pack_header_size; datalen = find_packed_object_size(p, entry->in_pack_offset) - entry->in_pack_header_size; - if (!pack_to_stdout && check_inflate(buf, datalen, entry->size)) + if (!pack_to_stdout && check_pack_inflate(p, &w_curs, + offset, datalen, entry->size)) die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); - sha1write(f, buf, datalen); - unuse_packed_git(p); + copy_pack_data(f, p, &w_curs, offset, datalen); + unuse_pack(&w_curs); reused++; } if (entry->delta) @@ -937,22 +982,19 @@ static void check_object(struct object_entry *entry) if (entry->in_pack && !entry->preferred_base) { struct packed_git *p = entry->in_pack; + struct pack_window *w_curs = NULL; unsigned long left = p->pack_size - entry->in_pack_offset; unsigned long size, used; unsigned char *buf; struct object_entry *base_entry = NULL; - use_packed_git(p); - buf = p->pack_base; - buf += entry->in_pack_offset; + buf = use_pack(p, &w_curs, entry->in_pack_offset, NULL); /* We want in_pack_type even if we do not reuse delta. * There is no point not reusing non-delta representations. */ used = unpack_object_header_gently(buf, left, &entry->in_pack_type, &size); - if (!used || left - used <= 20) - die("corrupt pack for %s", sha1_to_hex(entry->sha1)); /* Check if it is delta, and the base is also an object * we are going to pack. If so we will reuse the existing @@ -961,21 +1003,26 @@ static void check_object(struct object_entry *entry) if (!no_reuse_delta) { unsigned char c, *base_name; unsigned long ofs; + unsigned long used_0; /* there is at least 20 bytes left in the pack */ switch (entry->in_pack_type) { case OBJ_REF_DELTA: - base_name = buf + used; + base_name = use_pack(p, &w_curs, + entry->in_pack_offset + used, NULL); used += 20; break; case OBJ_OFS_DELTA: - c = buf[used++]; + buf = use_pack(p, &w_curs, + entry->in_pack_offset + used, NULL); + used_0 = 0; + c = buf[used_0++]; ofs = c & 127; while (c & 128) { ofs += 1; if (!ofs || ofs & ~(~0UL >> 7)) die("delta base offset overflow in pack for %s", sha1_to_hex(entry->sha1)); - c = buf[used++]; + c = buf[used_0++]; ofs = (ofs << 7) + (c & 127); } if (ofs >= entry->in_pack_offset) @@ -983,6 +1030,7 @@ static void check_object(struct object_entry *entry) sha1_to_hex(entry->sha1)); ofs = entry->in_pack_offset - ofs; base_name = find_packed_object_name(p, ofs); + used += used_0; break; default: base_name = NULL; @@ -990,7 +1038,7 @@ static void check_object(struct object_entry *entry) if (base_name) base_entry = locate_object_entry(base_name); } - unuse_packed_git(p); + unuse_pack(&w_curs); entry->in_pack_header_size = used; if (base_entry) { diff --git a/builtin-verify-pack.c b/builtin-verify-pack.c index 7d39d9bcd1..4e31c273f4 100644 --- a/builtin-verify-pack.c +++ b/builtin-verify-pack.c @@ -55,6 +55,7 @@ int cmd_verify_pack(int argc, const char **argv, const char *prefix) int no_more_options = 0; int nothing_done = 1; + git_config(git_default_config); while (1 < argc) { if (!no_more_options && argv[1][0] == '-') { if (!strcmp("-v", argv[1])) @@ -197,6 +197,8 @@ extern int warn_ambiguous_refs; extern int shared_repository; extern const char *apply_default_whitespace; extern int zlib_compression_level; +extern size_t packed_git_window_size; +extern size_t packed_git_limit; #define GIT_REPO_VERSION 0 extern int repository_format_version; @@ -336,14 +338,22 @@ extern struct alternate_object_database { } *alt_odb_list; extern void prepare_alt_odb(void); +struct pack_window { + struct pack_window *next; + unsigned char *base; + off_t offset; + size_t len; + unsigned int last_used; + unsigned int inuse_cnt; +}; + extern struct packed_git { struct packed_git *next; - unsigned long index_size; - unsigned long pack_size; + struct pack_window *windows; unsigned int *index_base; - void *pack_base; - unsigned int pack_last_used; - unsigned int pack_use_cnt; + off_t index_size; + off_t pack_size; + int pack_fd; int pack_local; unsigned char sha1[20]; /* something like ".git/objects/pack/xxxxx.pack" */ @@ -389,13 +399,14 @@ extern void install_packed_git(struct packed_git *pack); extern struct packed_git *find_sha1_pack(const unsigned char *sha1, struct packed_git *packs); -extern int use_packed_git(struct packed_git *); -extern void unuse_packed_git(struct packed_git *); +extern void pack_report(); +extern unsigned char* use_pack(struct packed_git *, struct pack_window **, unsigned long, unsigned int *); +extern void unuse_pack(struct pack_window **); extern struct packed_git *add_packed_git(char *, int, int); extern int num_packed_objects(const struct packed_git *p); extern int nth_packed_object_sha1(const struct packed_git *, int, unsigned char*); extern unsigned long find_pack_entry_one(const unsigned char *, struct packed_git *); -extern void *unpack_entry_gently(struct packed_git *, unsigned long, char *, unsigned long *); +extern void *unpack_entry(struct packed_git *, unsigned long, char *, unsigned long *); extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern void packed_object_info_detail(struct packed_git *, unsigned long, char *, unsigned long *, unsigned long *, unsigned int *, unsigned char *); @@ -421,6 +432,7 @@ extern char *git_commit_encoding; extern char *git_log_output_encoding; extern int copy_fd(int ifd, int ofd); +extern void read_or_die(int fd, void *buf, size_t count); extern int write_in_full(int fd, const void *buf, size_t count, const char *); extern void write_or_die(int fd, const void *buf, size_t count); extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); @@ -304,6 +304,21 @@ int git_default_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.packedgitwindowsize")) { + int pgsz = getpagesize(); + packed_git_window_size = git_config_int(var, value); + packed_git_window_size /= pgsz; + if (packed_git_window_size < 2) + packed_git_window_size = 2; + packed_git_window_size *= pgsz; + return 0; + } + + if (!strcmp(var, "core.packedgitlimit")) { + packed_git_limit = git_config_int(var, value); + return 0; + } + if (!strcmp(var, "user.name")) { strlcpy(git_default_name, value, sizeof(git_default_name)); return 0; @@ -695,7 +710,7 @@ int git_config_set_multivar(const char* key, const char* value, } fstat(in_fd, &st); - contents = mmap(NULL, st.st_size, PROT_READ, + contents = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, in_fd, 0); close(in_fd); @@ -1341,10 +1341,8 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only) fd = open(s->path, O_RDONLY); if (fd < 0) goto err_empty; - s->data = mmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0); + s->data = xmmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - if (s->data == MAP_FAILED) - goto err_empty; s->should_munmap = 1; } else { diff --git a/environment.c b/environment.c index a1502c4e87..09976c7bf6 100644 --- a/environment.c +++ b/environment.c @@ -23,6 +23,8 @@ char *git_log_output_encoding; int shared_repository = PERM_UMASK; const char *apply_default_whitespace; int zlib_compression_level = Z_DEFAULT_COMPRESSION; +size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE; +size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT; int pager_in_use; int pager_use_color = 1; diff --git a/git-compat-util.h b/git-compat-util.h index 5d9eb2615b..e023bf1413 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -92,12 +92,21 @@ extern void set_warn_routine(void (*routine)(const char *warn, va_list params)); extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); extern int git_munmap(void *start, size_t length); +#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) + #else /* NO_MMAP */ #include <sys/mman.h> +#define DEFAULT_PACKED_GIT_WINDOW_SIZE \ + (sizeof(void*) >= 8 \ + ? 1 * 1024 * 1024 * 1024 \ + : 32 * 1024 * 1024) #endif /* NO_MMAP */ +#define DEFAULT_PACKED_GIT_LIMIT \ + ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) + #ifdef NO_SETENV #define setenv gitsetenv extern int gitsetenv(const char *, const char *, int); @@ -118,11 +127,17 @@ extern char *gitstrcasestr(const char *haystack, const char *needle); extern size_t gitstrlcpy(char *, const char *, size_t); #endif +extern void release_pack_memory(size_t); + static inline char* xstrdup(const char *str) { char *ret = strdup(str); - if (!ret) - die("Out of memory, strdup failed"); + if (!ret) { + release_pack_memory(strlen(str) + 1); + ret = strdup(str); + if (!ret) + die("Out of memory, strdup failed"); + } return ret; } @@ -131,8 +146,14 @@ static inline void *xmalloc(size_t size) void *ret = malloc(size); if (!ret && !size) ret = malloc(1); - if (!ret) - die("Out of memory, malloc failed"); + if (!ret) { + release_pack_memory(size); + ret = malloc(size); + if (!ret && !size) + ret = malloc(1); + if (!ret) + die("Out of memory, malloc failed"); + } #ifdef XMALLOC_POISON memset(ret, 0xA5, size); #endif @@ -144,8 +165,14 @@ static inline void *xrealloc(void *ptr, size_t size) void *ret = realloc(ptr, size); if (!ret && !size) ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); + if (!ret) { + release_pack_memory(size); + ret = realloc(ptr, size); + if (!ret && !size) + ret = realloc(ptr, 1); + if (!ret) + die("Out of memory, realloc failed"); + } return ret; } @@ -154,8 +181,27 @@ static inline void *xcalloc(size_t nmemb, size_t size) void *ret = calloc(nmemb, size); if (!ret && (!nmemb || !size)) ret = calloc(1, 1); - if (!ret) - die("Out of memory, calloc failed"); + if (!ret) { + release_pack_memory(nmemb * size); + ret = calloc(nmemb, size); + if (!ret && (!nmemb || !size)) + ret = calloc(1, 1); + if (!ret) + die("Out of memory, calloc failed"); + } + return ret; +} + +static inline void *xmmap(void *start, size_t length, + int prot, int flags, int fd, off_t offset) +{ + void *ret = mmap(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) { + release_pack_memory(length); + ret = mmap(start, length, prot, flags, fd, offset); + if (ret == MAP_FAILED) + die("Out of memory? mmap failed: %s", strerror(errno)); + } return ret; } diff --git a/pack-check.c b/pack-check.c index 8e123b71ed..08a9fd8dc0 100644 --- a/pack-check.c +++ b/pack-check.c @@ -1,55 +1,45 @@ #include "cache.h" #include "pack.h" -#define BATCH (1u<<20) - -static int verify_packfile(struct packed_git *p) +static int verify_packfile(struct packed_git *p, + struct pack_window **w_curs) { unsigned long index_size = p->index_size; void *index_base = p->index_base; SHA_CTX ctx; unsigned char sha1[20]; - struct pack_header *hdr; + unsigned long offset = 0, pack_sig = p->pack_size - 20; int nr_objects, err, i; - unsigned char *packdata; - unsigned long datasize; - - /* Header consistency check */ - hdr = p->pack_base; - if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) - return error("Packfile %s signature mismatch", p->pack_name); - if (!pack_version_ok(hdr->hdr_version)) - return error("Packfile version %d unsupported", - ntohl(hdr->hdr_version)); - nr_objects = ntohl(hdr->hdr_entries); - if (num_packed_objects(p) != nr_objects) - return error("Packfile claims to have %d objects, " - "while idx size expects %d", nr_objects, - num_packed_objects(p)); - - /* Check integrity of pack data with its SHA-1 checksum */ + + /* Note that the pack header checks are actually performed by + * use_pack when it first opens the pack file. If anything + * goes wrong during those checks then the call will die out + * immediately. + */ + SHA1_Init(&ctx); - packdata = p->pack_base; - datasize = p->pack_size - 20; - while (datasize) { - unsigned long batch = (datasize < BATCH) ? datasize : BATCH; - SHA1_Update(&ctx, packdata, batch); - datasize -= batch; - packdata += batch; + while (offset < pack_sig) { + unsigned int remaining; + unsigned char *in = use_pack(p, w_curs, offset, &remaining); + offset += remaining; + if (offset > pack_sig) + remaining -= offset - pack_sig; + SHA1_Update(&ctx, in, remaining); } SHA1_Final(sha1, &ctx); - - if (hashcmp(sha1, (unsigned char *)(p->pack_base) + p->pack_size - 20)) + if (hashcmp(sha1, use_pack(p, w_curs, pack_sig, NULL))) return error("Packfile %s SHA1 mismatch with itself", p->pack_name); if (hashcmp(sha1, (unsigned char *)index_base + index_size - 40)) return error("Packfile %s SHA1 mismatch with idx", p->pack_name); + unuse_pack(w_curs); /* Make sure everything reachable from idx is valid. Since we * have verified that nr_objects matches between idx and pack, * we do not do scan-streaming check on the pack file. */ + nr_objects = num_packed_objects(p); for (i = err = 0; i < nr_objects; i++) { unsigned char sha1[20]; void *data; @@ -61,7 +51,7 @@ static int verify_packfile(struct packed_git *p) offset = find_pack_entry_one(sha1, p); if (!offset) die("internal error pack-check find-pack-entry-one"); - data = unpack_entry_gently(p, offset, type, &size); + data = unpack_entry(p, offset, type, &size); if (!data) { err = error("cannot unpack %s from %s", sha1_to_hex(sha1), p->pack_name); @@ -84,12 +74,10 @@ static int verify_packfile(struct packed_git *p) static void show_pack_info(struct packed_git *p) { - struct pack_header *hdr; int nr_objects, i; unsigned int chain_histogram[MAX_CHAIN]; - hdr = p->pack_base; - nr_objects = ntohl(hdr->hdr_entries); + nr_objects = num_packed_objects(p); memset(chain_histogram, 0, sizeof(chain_histogram)); for (i = 0; i < nr_objects; i++) { @@ -152,18 +140,16 @@ int verify_pack(struct packed_git *p, int verbose) if (!ret) { /* Verify pack file */ - use_packed_git(p); - ret = verify_packfile(p); - unuse_packed_git(p); + struct pack_window *w_curs = NULL; + ret = verify_packfile(p, &w_curs); + unuse_pack(&w_curs); } if (verbose) { if (ret) printf("%s: bad\n", p->pack_name); else { - use_packed_git(p); show_pack_info(p); - unuse_packed_git(p); printf("%s: ok\n", p->pack_name); } } diff --git a/read-cache.c b/read-cache.c index b8d83ccd9f..29cf9abe64 100644 --- a/read-cache.c +++ b/read-cache.c @@ -793,16 +793,16 @@ int read_cache_from(const char *path) die("index file open failed (%s)", strerror(errno)); } - cache_mmap = MAP_FAILED; if (!fstat(fd, &st)) { cache_mmap_size = st.st_size; errno = EINVAL; if (cache_mmap_size >= sizeof(struct cache_header) + 20) - cache_mmap = mmap(NULL, cache_mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); - } + cache_mmap = xmmap(NULL, cache_mmap_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); + else + die("index file smaller than expected"); + } else + die("cannot stat the open index (%s)", strerror(errno)); close(fd); - if (cache_mmap == MAP_FAILED) - die("index file mmap failed (%s)", strerror(errno)); hdr = cache_mmap; if (verify_hdr(hdr, cache_mmap_size) < 0) @@ -1025,7 +1025,7 @@ int read_ref_at(const char *ref, unsigned long at_time, int cnt, unsigned char * fstat(logfd, &st); if (!st.st_size) die("Log %s is empty.", logfile); - logdata = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, logfd, 0); + logdata = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, logfd, 0); close(logfd); lastrec = NULL; diff --git a/sha1_file.c b/sha1_file.c index 1c4df5b73e..d9622d95e7 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -355,10 +355,8 @@ static void read_info_alternates(const char * relative_base, int depth) close(fd); return; } - map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - if (map == MAP_FAILED) - return; link_alt_odb_entries(map, map + st.st_size, '\n', relative_base, depth); @@ -397,11 +395,35 @@ static char *find_sha1_file(const unsigned char *sha1, struct stat *st) return NULL; } -#define PACK_MAX_SZ (1<<26) -static int pack_used_ctr; -static unsigned long pack_mapped; +static unsigned int pack_used_ctr; +static unsigned int pack_mmap_calls; +static unsigned int peak_pack_open_windows; +static unsigned int pack_open_windows; +static size_t peak_pack_mapped; +static size_t pack_mapped; +static size_t page_size; struct packed_git *packed_git; +void pack_report() +{ + fprintf(stderr, + "pack_report: getpagesize() = %10lu\n" + "pack_report: core.packedGitWindowSize = %10lu\n" + "pack_report: core.packedGitLimit = %10lu\n", + page_size, + packed_git_window_size, + packed_git_limit); + fprintf(stderr, + "pack_report: pack_used_ctr = %10u\n" + "pack_report: pack_mmap_calls = %10u\n" + "pack_report: pack_open_windows = %10u / %10u\n" + "pack_report: pack_mapped = %10lu / %10lu\n", + pack_used_ctr, + pack_mmap_calls, + pack_open_windows, peak_pack_open_windows, + pack_mapped, peak_pack_mapped); +} + static int check_packed_git_idx(const char *path, unsigned long *idx_size_, void **idx_map_) { @@ -418,10 +440,8 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_, return -1; } idx_size = st.st_size; - idx_map = mmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); + idx_map = xmmap(NULL, idx_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - if (idx_map == MAP_FAILED) - return -1; index = idx_map; *idx_map_ = idx_map; @@ -451,86 +471,198 @@ static int check_packed_git_idx(const char *path, unsigned long *idx_size_, return 0; } -static int unuse_one_packed_git(void) +static void scan_windows(struct packed_git *p, + struct packed_git **lru_p, + struct pack_window **lru_w, + struct pack_window **lru_l) { - struct packed_git *p, *lru = NULL; + struct pack_window *w, *w_l; - for (p = packed_git; p; p = p->next) { - if (p->pack_use_cnt || !p->pack_base) - continue; - if (!lru || p->pack_last_used < lru->pack_last_used) - lru = p; + for (w_l = NULL, w = p->windows; w; w = w->next) { + if (!w->inuse_cnt) { + if (!*lru_w || w->last_used < (*lru_w)->last_used) { + *lru_p = p; + *lru_w = w; + *lru_l = w_l; + } + } + w_l = w; } - if (!lru) - return 0; - munmap(lru->pack_base, lru->pack_size); - lru->pack_base = NULL; - return 1; } -void unuse_packed_git(struct packed_git *p) +static int unuse_one_window(struct packed_git *current) +{ + struct packed_git *p, *lru_p = NULL; + struct pack_window *lru_w = NULL, *lru_l = NULL; + + if (current) + scan_windows(current, &lru_p, &lru_w, &lru_l); + for (p = packed_git; p; p = p->next) + scan_windows(p, &lru_p, &lru_w, &lru_l); + if (lru_p) { + munmap(lru_w->base, lru_w->len); + pack_mapped -= lru_w->len; + if (lru_l) + lru_l->next = lru_w->next; + else { + lru_p->windows = lru_w->next; + if (!lru_p->windows && lru_p != current) { + close(lru_p->pack_fd); + lru_p->pack_fd = -1; + } + } + free(lru_w); + pack_open_windows--; + return 1; + } + return 0; +} + +void release_pack_memory(size_t need) { - p->pack_use_cnt--; + size_t cur = pack_mapped; + while (need >= (cur - pack_mapped) && unuse_one_window(NULL)) + ; /* nothing */ } -int use_packed_git(struct packed_git *p) +void unuse_pack(struct pack_window **w_cursor) { + struct pack_window *w = *w_cursor; + if (w) { + w->inuse_cnt--; + *w_cursor = NULL; + } +} + +static void open_packed_git(struct packed_git *p) +{ + struct stat st; + struct pack_header hdr; + unsigned char sha1[20]; + unsigned char *idx_sha1; + long fd_flag; + + p->pack_fd = open(p->pack_name, O_RDONLY); + if (p->pack_fd < 0 || fstat(p->pack_fd, &st)) + die("packfile %s cannot be opened", p->pack_name); + + /* If we created the struct before we had the pack we lack size. */ if (!p->pack_size) { - struct stat st; - /* We created the struct before we had the pack */ - stat(p->pack_name, &st); if (!S_ISREG(st.st_mode)) die("packfile %s not a regular file", p->pack_name); p->pack_size = st.st_size; - } - if (!p->pack_base) { - int fd; - struct stat st; - void *map; - struct pack_header *hdr; - - pack_mapped += p->pack_size; - while (PACK_MAX_SZ < pack_mapped && unuse_one_packed_git()) - ; /* nothing */ - fd = open(p->pack_name, O_RDONLY); - if (fd < 0) - die("packfile %s cannot be opened", p->pack_name); - if (fstat(fd, &st)) { - close(fd); - die("packfile %s cannot be opened", p->pack_name); - } - if (st.st_size != p->pack_size) - die("packfile %s size mismatch.", p->pack_name); - map = mmap(NULL, p->pack_size, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - if (map == MAP_FAILED) - die("packfile %s cannot be mapped.", p->pack_name); - p->pack_base = map; + } else if (p->pack_size != st.st_size) + die("packfile %s size changed", p->pack_name); - /* Check if we understand this pack file. If we don't we're - * likely too old to handle it. - */ - hdr = map; - if (hdr->hdr_signature != htonl(PACK_SIGNATURE)) - die("packfile %s isn't actually a pack.", p->pack_name); - if (!pack_version_ok(hdr->hdr_version)) - die("packfile %s is version %i and not supported" - " (try upgrading GIT to a newer version)", - p->pack_name, ntohl(hdr->hdr_version)); - - /* Check if the pack file matches with the index file. - * this is cheap. - */ - if (hashcmp((unsigned char *)(p->index_base) + - p->index_size - 40, - (unsigned char *)p->pack_base + - p->pack_size - 20)) { - die("packfile %s does not match index.", p->pack_name); + /* We leave these file descriptors open with sliding mmap; + * there is no point keeping them open across exec(), though. + */ + fd_flag = fcntl(p->pack_fd, F_GETFD, 0); + if (fd_flag < 0) + die("cannot determine file descriptor flags"); + fd_flag |= FD_CLOEXEC; + if (fcntl(p->pack_fd, F_SETFD, fd_flag) == -1) + die("cannot set FD_CLOEXEC"); + + /* Verify we recognize this pack file format. */ + read_or_die(p->pack_fd, &hdr, sizeof(hdr)); + if (hdr.hdr_signature != htonl(PACK_SIGNATURE)) + die("file %s is not a GIT packfile", p->pack_name); + if (!pack_version_ok(hdr.hdr_version)) + die("packfile %s is version %u and not supported" + " (try upgrading GIT to a newer version)", + p->pack_name, ntohl(hdr.hdr_version)); + + /* Verify the pack matches its index. */ + if (num_packed_objects(p) != ntohl(hdr.hdr_entries)) + die("packfile %s claims to have %u objects" + " while index size indicates %u objects", + p->pack_name, ntohl(hdr.hdr_entries), + num_packed_objects(p)); + if (lseek(p->pack_fd, p->pack_size - sizeof(sha1), SEEK_SET) == -1) + die("end of packfile %s is unavailable", p->pack_name); + read_or_die(p->pack_fd, sha1, sizeof(sha1)); + idx_sha1 = ((unsigned char *)p->index_base) + p->index_size - 40; + if (hashcmp(sha1, idx_sha1)) + die("packfile %s does not match index", p->pack_name); +} + +static int in_window(struct pack_window *win, unsigned long offset) +{ + /* We must promise at least 20 bytes (one hash) after the + * offset is available from this window, otherwise the offset + * is not actually in this window and a different window (which + * has that one hash excess) must be used. This is to support + * the object header and delta base parsing routines below. + */ + off_t win_off = win->offset; + return win_off <= offset + && (offset + 20) <= (win_off + win->len); +} + +unsigned char* use_pack(struct packed_git *p, + struct pack_window **w_cursor, + unsigned long offset, + unsigned int *left) +{ + struct pack_window *win = *w_cursor; + + if (p->pack_fd == -1) + open_packed_git(p); + + /* Since packfiles end in a hash of their content and its + * pointless to ask for an offset into the middle of that + * hash, and the in_window function above wouldn't match + * don't allow an offset too close to the end of the file. + */ + if (offset > (p->pack_size - 20)) + die("offset beyond end of packfile (truncated pack?)"); + + if (!win || !in_window(win, offset)) { + if (win) + win->inuse_cnt--; + for (win = p->windows; win; win = win->next) { + if (in_window(win, offset)) + break; + } + if (!win) { + if (!page_size) + page_size = getpagesize(); + win = xcalloc(1, sizeof(*win)); + win->offset = (offset / page_size) * page_size; + win->len = p->pack_size - win->offset; + if (win->len > packed_git_window_size) + win->len = packed_git_window_size; + pack_mapped += win->len; + while (packed_git_limit < pack_mapped + && unuse_one_window(p)) + ; /* nothing */ + win->base = xmmap(NULL, win->len, + PROT_READ, MAP_PRIVATE, + p->pack_fd, win->offset); + if (win->base == MAP_FAILED) + die("packfile %s cannot be mapped: %s", + p->pack_name, + strerror(errno)); + pack_mmap_calls++; + pack_open_windows++; + if (pack_mapped > peak_pack_mapped) + peak_pack_mapped = pack_mapped; + if (pack_open_windows > peak_pack_open_windows) + peak_pack_open_windows = pack_open_windows; + win->next = p->windows; + p->windows = win; } } - p->pack_last_used = pack_used_ctr++; - p->pack_use_cnt++; - return 0; + if (win != *w_cursor) { + win->last_used = pack_used_ctr++; + win->inuse_cnt++; + *w_cursor = win; + } + offset -= win->offset; + if (left) + *left = win->len - offset; + return win->base + offset; } struct packed_git *add_packed_git(char *path, int path_len, int local) @@ -559,9 +691,8 @@ struct packed_git *add_packed_git(char *path, int path_len, int local) p->pack_size = st.st_size; p->index_base = idx_map; p->next = NULL; - p->pack_base = NULL; - p->pack_last_used = 0; - p->pack_use_cnt = 0; + p->windows = NULL; + p->pack_fd = -1; p->pack_local = local; if ((path_len > 44) && !get_sha1_hex(path + path_len - 44, sha1)) hashcpy(p->sha1, sha1); @@ -592,9 +723,8 @@ struct packed_git *parse_pack_index_file(const unsigned char *sha1, char *idx_pa p->pack_size = 0; p->index_base = idx_map; p->next = NULL; - p->pack_base = NULL; - p->pack_last_used = 0; - p->pack_use_cnt = 0; + p->windows = NULL; + p->pack_fd = -1; hashcpy(p->sha1, sha1); return p; } @@ -705,10 +835,8 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size) */ sha1_file_open_flag = 0; } - map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + map = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - if (map == MAP_FAILED) - return NULL; *size = st.st_size; return map; } @@ -878,18 +1006,21 @@ void * unpack_sha1_file(void *map, unsigned long mapsize, char *type, unsigned l } static unsigned long get_delta_base(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, enum object_type kind, unsigned long delta_obj_offset, unsigned long *base_obj_offset) { - unsigned char *base_info = (unsigned char *) p->pack_base + offset; + unsigned char *base_info = use_pack(p, w_curs, offset, NULL); unsigned long base_offset; - /* there must be at least 20 bytes left regardless of delta type */ - if (p->pack_size <= offset + 20) - die("truncated pack file"); - + /* use_pack() assured us we have [base_info, base_info + 20) + * as a range that we can look at without walking off the + * end of the mapped window. Its actually the hash size + * that is assured. An OFS_DELTA longer than the hash size + * is stupid, as then a REF_DELTA would be smaller to store. + */ if (kind == OBJ_OFS_DELTA) { unsigned used = 0; unsigned char c = base_info[used++]; @@ -923,6 +1054,7 @@ static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep); static int packed_delta_info(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, enum object_type kind, unsigned long obj_offset, @@ -931,7 +1063,8 @@ static int packed_delta_info(struct packed_git *p, { unsigned long base_offset; - offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); + offset = get_delta_base(p, w_curs, offset, kind, + obj_offset, &base_offset); /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta @@ -943,20 +1076,23 @@ static int packed_delta_info(struct packed_git *p, if (sizep) { const unsigned char *data; - unsigned char delta_head[20]; + unsigned char delta_head[20], *in; unsigned long result_size; z_stream stream; int st; memset(&stream, 0, sizeof(stream)); - - stream.next_in = (unsigned char *) p->pack_base + offset; - stream.avail_in = p->pack_size - offset; stream.next_out = delta_head; stream.avail_out = sizeof(delta_head); inflateInit(&stream); - st = inflate(&stream, Z_FINISH); + do { + in = use_pack(p, w_curs, offset, &stream.avail_in); + stream.next_in = in; + st = inflate(&stream, Z_FINISH); + offset += stream.next_in - in; + } while ((st == Z_OK || st == Z_BUF_ERROR) + && stream.total_out < sizeof(delta_head)); inflateEnd(&stream); if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) @@ -977,17 +1113,24 @@ static int packed_delta_info(struct packed_git *p, return 0; } -static unsigned long unpack_object_header(struct packed_git *p, unsigned long offset, - enum object_type *type, unsigned long *sizep) +static unsigned long unpack_object_header(struct packed_git *p, + struct pack_window **w_curs, + unsigned long offset, + enum object_type *type, + unsigned long *sizep) { + unsigned char *base; + unsigned int left; unsigned long used; - if (p->pack_size <= offset) - die("object offset outside of pack file"); - - used = unpack_object_header_gently((unsigned char *)p->pack_base + - offset, - p->pack_size - offset, type, sizep); + /* use_pack() assures us we have [base, base + 20) available + * as a range that we can look at at. (Its actually the hash + * size that is assurred.) With our object header encoding + * the maximum deflated object size is 2^137, which is just + * insane, so we know won't exceed what we have been given. + */ + base = use_pack(p, w_curs, offset, &left); + used = unpack_object_header_gently(base, left, type, sizep); if (!used) die("object offset outside of pack file"); @@ -1002,13 +1145,14 @@ void packed_object_info_detail(struct packed_git *p, unsigned int *delta_chain_length, unsigned char *base_sha1) { + struct pack_window *w_curs = NULL; unsigned long obj_offset, val; unsigned char *next_sha1; enum object_type kind; *delta_chain_length = 0; obj_offset = offset; - offset = unpack_object_header(p, offset, &kind, size); + offset = unpack_object_header(p, &w_curs, offset, &kind, size); for (;;) { switch (kind) { @@ -1021,25 +1165,24 @@ void packed_object_info_detail(struct packed_git *p, case OBJ_TAG: strcpy(type, type_names[kind]); *store_size = 0; /* notyet */ + unuse_pack(&w_curs); return; case OBJ_OFS_DELTA: - get_delta_base(p, offset, kind, obj_offset, &offset); + get_delta_base(p, &w_curs, offset, kind, + obj_offset, &offset); if (*delta_chain_length == 0) { /* TODO: find base_sha1 as pointed by offset */ } break; case OBJ_REF_DELTA: - if (p->pack_size <= offset + 20) - die("pack file %s records an incomplete delta base", - p->pack_name); - next_sha1 = (unsigned char *) p->pack_base + offset; + next_sha1 = use_pack(p, &w_curs, offset, NULL); if (*delta_chain_length == 0) hashcpy(base_sha1, next_sha1); offset = find_pack_entry_one(next_sha1, p); break; } obj_offset = offset; - offset = unpack_object_header(p, offset, &kind, &val); + offset = unpack_object_header(p, &w_curs, offset, &kind, &val); (*delta_chain_length)++; } } @@ -1047,20 +1190,26 @@ void packed_object_info_detail(struct packed_git *p, static int packed_object_info(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { + struct pack_window *w_curs = NULL; unsigned long size, obj_offset = offset; enum object_type kind; + int r; - offset = unpack_object_header(p, offset, &kind, &size); + offset = unpack_object_header(p, &w_curs, offset, &kind, &size); switch (kind) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - return packed_delta_info(p, offset, kind, obj_offset, type, sizep); + r = packed_delta_info(p, &w_curs, offset, kind, + obj_offset, type, sizep); + unuse_pack(&w_curs); + return r; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: strcpy(type, type_names[kind]); + unuse_pack(&w_curs); break; default: die("pack %s contains unknown object type %d", @@ -1072,23 +1221,27 @@ static int packed_object_info(struct packed_git *p, unsigned long offset, } static void *unpack_compressed_entry(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, unsigned long size) { int st; z_stream stream; - unsigned char *buffer; + unsigned char *buffer, *in; buffer = xmalloc(size + 1); buffer[size] = 0; memset(&stream, 0, sizeof(stream)); - stream.next_in = (unsigned char*)p->pack_base + offset; - stream.avail_in = p->pack_size - offset; stream.next_out = buffer; stream.avail_out = size; inflateInit(&stream); - st = inflate(&stream, Z_FINISH); + do { + in = use_pack(p, w_curs, offset, &stream.avail_in); + stream.next_in = in; + st = inflate(&stream, Z_FINISH); + offset += stream.next_in - in; + } while (st == Z_OK || st == Z_BUF_ERROR); inflateEnd(&stream); if ((st != Z_STREAM_END) || stream.total_out != size) { free(buffer); @@ -1099,6 +1252,7 @@ static void *unpack_compressed_entry(struct packed_git *p, } static void *unpack_delta_entry(struct packed_git *p, + struct pack_window **w_curs, unsigned long offset, unsigned long delta_size, enum object_type kind, @@ -1109,13 +1263,14 @@ static void *unpack_delta_entry(struct packed_git *p, void *delta_data, *result, *base; unsigned long result_size, base_size, base_offset; - offset = get_delta_base(p, offset, kind, obj_offset, &base_offset); - base = unpack_entry_gently(p, base_offset, type, &base_size); + offset = get_delta_base(p, w_curs, offset, kind, + obj_offset, &base_offset); + base = unpack_entry(p, base_offset, type, &base_size); if (!base) die("failed to read delta base object at %lu from %s", base_offset, p->pack_name); - delta_data = unpack_compressed_entry(p, offset, delta_size); + delta_data = unpack_compressed_entry(p, w_curs, offset, delta_size); result = patch_delta(base, base_size, delta_data, delta_size, &result_size); @@ -1127,43 +1282,34 @@ static void *unpack_delta_entry(struct packed_git *p, return result; } -static void *unpack_entry(struct pack_entry *entry, - char *type, unsigned long *sizep) -{ - struct packed_git *p = entry->p; - void *retval; - - if (use_packed_git(p)) - die("cannot map packed file"); - retval = unpack_entry_gently(p, entry->offset, type, sizep); - unuse_packed_git(p); - if (!retval) - die("corrupted pack file %s", p->pack_name); - return retval; -} - -/* The caller is responsible for use_packed_git()/unuse_packed_git() pair */ -void *unpack_entry_gently(struct packed_git *p, unsigned long offset, +void *unpack_entry(struct packed_git *p, unsigned long offset, char *type, unsigned long *sizep) { + struct pack_window *w_curs = NULL; unsigned long size, obj_offset = offset; enum object_type kind; + void *retval; - offset = unpack_object_header(p, offset, &kind, &size); + offset = unpack_object_header(p, &w_curs, offset, &kind, &size); switch (kind) { case OBJ_OFS_DELTA: case OBJ_REF_DELTA: - return unpack_delta_entry(p, offset, size, kind, obj_offset, type, sizep); + retval = unpack_delta_entry(p, &w_curs, offset, size, + kind, obj_offset, type, sizep); + break; case OBJ_COMMIT: case OBJ_TREE: case OBJ_BLOB: case OBJ_TAG: strcpy(type, type_names[kind]); *sizep = size; - return unpack_compressed_entry(p, offset, size); + retval = unpack_compressed_entry(p, &w_curs, offset, size); + break; default: - return NULL; + die("unknown object type %i in %s", kind, p->pack_name); } + unuse_pack(&w_curs); + return retval; } int num_packed_objects(const struct packed_git *p) @@ -1289,7 +1435,6 @@ static int sha1_loose_object_info(const unsigned char *sha1, char *type, unsigne int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep) { - int status; struct pack_entry e; if (!find_pack_entry(sha1, &e, NULL)) { @@ -1297,11 +1442,7 @@ int sha1_object_info(const unsigned char *sha1, char *type, unsigned long *sizep if (!find_pack_entry(sha1, &e, NULL)) return sha1_loose_object_info(sha1, type, sizep); } - if (use_packed_git(e.p)) - die("cannot map packed file"); - status = packed_object_info(e.p, e.offset, type, sizep); - unuse_packed_git(e.p); - return status; + return packed_object_info(e.p, e.offset, type, sizep); } static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned long *size) @@ -1312,7 +1453,7 @@ static void *read_packed_sha1(const unsigned char *sha1, char *type, unsigned lo error("cannot read sha1_file for %s", sha1_to_hex(sha1)); return NULL; } - return unpack_entry(&e, type, size); + return unpack_entry(e.p, e.offset, type, size); } void * read_sha1_file(const unsigned char *sha1, char *type, unsigned long *size) @@ -1851,10 +1992,8 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, con buf = ""; if (size) - buf = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); close(fd); - if (buf == MAP_FAILED) - return -1; if (!type) type = blob_type; diff --git a/t/t5301-sliding-window.sh b/t/t5301-sliding-window.sh new file mode 100755 index 0000000000..5a7232a577 --- /dev/null +++ b/t/t5301-sliding-window.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# +# Copyright (c) 2006 Shawn Pearce +# + +test_description='mmap sliding window tests' +. ./test-lib.sh + +test_expect_success \ + 'setup' \ + 'rm -f .git/index* + for i in a b c + do + echo $i >$i && + dd if=/dev/urandom bs=32k count=1 >>$i && + git-update-index --add $i || return 1 + done && + echo d >d && cat c >>d && git-update-index --add d && + tree=`git-write-tree` && + commit1=`git-commit-tree $tree </dev/null` && + git-update-ref HEAD $commit1 && + git-repack -a -d && + test "`git-count-objects`" = "0 objects, 0 kilobytes" && + pack1=`ls .git/objects/pack/*.pack` && + test -f "$pack1"' + +test_expect_success \ + 'verify-pack -v, defaults' \ + 'git-verify-pack -v "$pack1"' + +test_expect_success \ + 'verify-pack -v, packedGitWindowSize == 1 page' \ + 'git-repo-config core.packedGitWindowSize 512 && + git-verify-pack -v "$pack1"' + +test_expect_success \ + 'verify-pack -v, packedGit{WindowSize,Limit} == 1 page' \ + 'git-repo-config core.packedGitWindowSize 512 && + git-repo-config core.packedGitLimit 512 && + git-verify-pack -v "$pack1"' + +test_expect_success \ + 'repack -a -d, packedGit{WindowSize,Limit} == 1 page' \ + 'git-repo-config core.packedGitWindowSize 512 && + git-repo-config core.packedGitLimit 512 && + commit2=`git-commit-tree $tree -p $commit1 </dev/null` && + git-update-ref HEAD $commit2 && + git-repack -a -d && + test "`git-count-objects`" = "0 objects, 0 kilobytes" && + pack2=`ls .git/objects/pack/*.pack` && + test -f "$pack2" + test "$pack1" \!= "$pack2"' + +test_expect_success \ + 'verify-pack -v, defaults' \ + 'git-repo-config --unset core.packedGitWindowSize && + git-repo-config --unset core.packedGitLimit && + git-verify-pack -v "$pack2"' + +test_done diff --git a/write_or_die.c b/write_or_die.c index 650f13fc01..6db1d3123d 100644 --- a/write_or_die.c +++ b/write_or_die.c @@ -1,5 +1,21 @@ #include "cache.h" +void read_or_die(int fd, void *buf, size_t count) +{ + char *p = buf; + ssize_t loaded; + + while (count > 0) { + loaded = xread(fd, p, count); + if (loaded == 0) + die("unexpected end of file"); + else if (loaded < 0) + die("read error (%s)", strerror(errno)); + count -= loaded; + p += loaded; + } +} + void write_or_die(int fd, const void *buf, size_t count) { const char *p = buf; |