diff options
-rw-r--r-- | Documentation/git-pack-objects.txt | 5 | ||||
-rw-r--r-- | Documentation/git-repack.txt | 5 | ||||
-rw-r--r-- | builtin-pack-objects.c | 329 | ||||
-rw-r--r-- | csum-file.c | 8 | ||||
-rwxr-xr-x | git-repack.sh | 19 |
5 files changed, 250 insertions, 116 deletions
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 2531238df4..cfe127ad9e 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -85,6 +85,11 @@ base-name:: times to get to the necessary object. The default value for --window is 10 and --depth is 50. +--max-pack-size=<n>:: + Maximum size of each output packfile, expressed in MiB. + If specified, multiple packfiles may be created. + The default is unlimited. + --incremental:: This flag causes an object already in a pack ignored even if it appears in the standard input. diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt index cc3b0b21c7..2847c9b8d7 100644 --- a/Documentation/git-repack.txt +++ b/Documentation/git-repack.txt @@ -65,6 +65,11 @@ OPTIONS to be applied that many times to get to the necessary object. The default value for --window is 10 and --depth is 50. +--max-pack-size=<n>:: + Maximum size of each output packfile, expressed in MiB. + If specified, multiple packfiles may be created. + The default is unlimited. + Configuration ------------- diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index eca130f055..e52332df99 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -16,7 +16,7 @@ #include "progress.h" static const char pack_usage[] = "\ -git-pack-objects [{ -q | --progress | --all-progress }] \n\ +git-pack-objects [{ -q | --progress | --all-progress }] [--max-pack-size=N] \n\ [--local] [--incremental] [--window=N] [--depth=N] \n\ [--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\ [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\ @@ -54,7 +54,8 @@ struct object_entry { * nice "minimum seek" order. */ static struct object_entry *objects; -static uint32_t nr_objects, nr_alloc, nr_result; +static struct object_entry **written_list; +static uint32_t nr_objects, nr_alloc, nr_result, nr_written; static int non_empty; static int no_reuse_delta, no_reuse_object; @@ -63,9 +64,11 @@ static int incremental; static int allow_ofs_delta; static const char *pack_tmp_name, *idx_tmp_name; static char tmpname[PATH_MAX]; +static const char *base_name; static unsigned char pack_file_sha1[20]; static int progress = 1; static int window = 10; +static uint32_t pack_size_limit; static int depth = 50; static int pack_to_stdout; static int num_preferred_base; @@ -351,16 +354,31 @@ static void copy_pack_data(struct sha1file *f, } static unsigned long write_object(struct sha1file *f, - struct object_entry *entry) + struct object_entry *entry, + off_t write_offset) { unsigned long size; enum object_type type; void *buf; unsigned char header[10]; + unsigned char dheader[10]; unsigned hdrlen; off_t datalen; enum object_type obj_type; int to_reuse = 0; + /* write limit if limited packsize and not first object */ + unsigned long limit = pack_size_limit && nr_written ? + pack_size_limit - write_offset : 0; + /* no if no delta */ + int usable_delta = !entry->delta ? 0 : + /* yes if unlimited packfile */ + !pack_size_limit ? 1 : + /* no if base written to previous pack */ + entry->delta->offset == (off_t)-1 ? 0 : + /* otherwise double-check written to this + * pack, like we do below + */ + entry->delta->offset ? 1 : 0; if (!pack_to_stdout) crc32_begin(f); @@ -371,7 +389,9 @@ static unsigned long write_object(struct sha1file *f, else if (!entry->in_pack) to_reuse = 0; /* can't reuse what we don't have */ else if (obj_type == OBJ_REF_DELTA || obj_type == OBJ_OFS_DELTA) - to_reuse = 1; /* check_object() decided it for us */ + /* check_object() decided it for us ... */ + to_reuse = usable_delta; + /* ... but pack split may override that */ else if (obj_type != entry->in_pack_type) to_reuse = 0; /* pack has delta which is unusable */ else if (entry->delta) @@ -382,24 +402,48 @@ static unsigned long write_object(struct sha1file *f, */ if (!to_reuse) { + z_stream stream; + unsigned long maxsize; + void *out; buf = read_sha1_file(entry->sha1, &type, &size); if (!buf) die("unable to read %s", sha1_to_hex(entry->sha1)); if (size != entry->size) die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); - if (entry->delta) { + if (usable_delta) { buf = delta_against(buf, size, entry); size = entry->delta_size; obj_type = (allow_ofs_delta && entry->delta->offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; + } else { + /* + * recover real object type in case + * check_object() wanted to re-use a delta, + * but we couldn't since base was in previous split pack + */ + obj_type = type; } + /* compress the data to store and put compressed length in datalen */ + memset(&stream, 0, sizeof(stream)); + deflateInit(&stream, pack_compression_level); + maxsize = deflateBound(&stream, size); + out = xmalloc(maxsize); + /* Compress it */ + stream.next_in = buf; + stream.avail_in = size; + stream.next_out = out; + stream.avail_out = maxsize; + while (deflate(&stream, Z_FINISH) == Z_OK) + /* nothing */; + deflateEnd(&stream); + datalen = stream.total_out; + deflateEnd(&stream); /* * The object header is a byte of 'type' followed by zero or * more bytes of length. */ hdrlen = encode_header(obj_type, size, header); - sha1write(f, header, hdrlen); if (obj_type == OBJ_OFS_DELTA) { /* @@ -408,21 +452,41 @@ static unsigned long write_object(struct sha1file *f, * base from this object's position in the pack. */ off_t ofs = entry->offset - entry->delta->offset; - unsigned pos = sizeof(header) - 1; - header[pos] = ofs & 127; + unsigned pos = sizeof(dheader) - 1; + dheader[pos] = ofs & 127; while (ofs >>= 7) - header[--pos] = 128 | (--ofs & 127); - sha1write(f, header + pos, sizeof(header) - pos); - hdrlen += sizeof(header) - pos; + dheader[--pos] = 128 | (--ofs & 127); + if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { + free(out); + free(buf); + return 0; + } + sha1write(f, header, hdrlen); + sha1write(f, dheader + pos, sizeof(dheader) - pos); + hdrlen += sizeof(dheader) - pos; } else if (obj_type == OBJ_REF_DELTA) { /* * Deltas with a base reference contain * an additional 20 bytes for the base sha1. */ + if (limit && hdrlen + 20 + datalen + 20 >= limit) { + free(out); + free(buf); + return 0; + } + sha1write(f, header, hdrlen); sha1write(f, entry->delta->sha1, 20); hdrlen += 20; + } else { + if (limit && hdrlen + datalen + 20 >= limit) { + free(out); + free(buf); + return 0; + } + sha1write(f, header, hdrlen); } - datalen = sha1write_compressed(f, buf, size, pack_compression_level); + sha1write(f, out, datalen); + free(out); free(buf); } else { @@ -437,20 +501,6 @@ static unsigned long write_object(struct sha1file *f, reused_delta++; } hdrlen = encode_header(obj_type, entry->size, header); - sha1write(f, header, hdrlen); - if (obj_type == OBJ_OFS_DELTA) { - off_t ofs = entry->offset - entry->delta->offset; - unsigned pos = sizeof(header) - 1; - header[pos] = ofs & 127; - while (ofs >>= 7) - header[--pos] = 128 | (--ofs & 127); - sha1write(f, header + pos, sizeof(header) - pos); - hdrlen += sizeof(header) - pos; - } else if (obj_type == OBJ_REF_DELTA) { - sha1write(f, entry->delta->sha1, 20); - hdrlen += 20; - } - offset = entry->in_pack_offset; revidx = find_packed_object(p, offset); datalen = revidx[1].offset - offset; @@ -459,6 +509,29 @@ static unsigned long write_object(struct sha1file *f, die("bad packed object CRC for %s", sha1_to_hex(entry->sha1)); offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; + if (obj_type == OBJ_OFS_DELTA) { + off_t ofs = entry->offset - entry->delta->offset; + unsigned pos = sizeof(dheader) - 1; + dheader[pos] = ofs & 127; + while (ofs >>= 7) + dheader[--pos] = 128 | (--ofs & 127); + if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) + return 0; + sha1write(f, header, hdrlen); + sha1write(f, dheader + pos, sizeof(dheader) - pos); + hdrlen += sizeof(dheader) - pos; + } else if (obj_type == OBJ_REF_DELTA) { + if (limit && hdrlen + 20 + datalen + 20 >= limit) + return 0; + sha1write(f, header, hdrlen); + sha1write(f, entry->delta->sha1, 20); + hdrlen += 20; + } else { + if (limit && hdrlen + datalen + 20 >= limit) + return 0; + sha1write(f, header, hdrlen); + } + if (!pack_to_stdout && p->index_version == 1 && check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) die("corrupt packed object for %s", sha1_to_hex(entry->sha1)); @@ -466,7 +539,7 @@ static unsigned long write_object(struct sha1file *f, unuse_pack(&w_curs); reused++; } - if (entry->delta) + if (usable_delta) written_delta++; written++; if (!pack_to_stdout) @@ -485,11 +558,19 @@ static off_t write_one(struct sha1file *f, return offset; /* if we are deltified, write out base object first. */ - if (e->delta) + if (e->delta) { offset = write_one(f, e->delta, offset); + if (!offset) + return 0; + } e->offset = offset; - size = write_object(f, e); + size = write_object(f, e, offset); + if (!size) { + e->offset = 0; + return 0; + } + written_list[nr_written++] = e; /* make sure off_t is sufficiently large not to wrap */ if (offset > offset + size) @@ -503,49 +584,113 @@ static int open_object_dir_tmp(const char *path) return mkstemp(tmpname); } -static off_t write_pack_file(void) +/* forward declarations for write_pack_file */ +static void write_index_file(off_t last_obj_offset, unsigned char *sha1); +static int adjust_perm(const char *path, mode_t mode); + +static void write_pack_file(void) { - uint32_t i; + uint32_t i = 0, j; struct sha1file *f; - off_t offset, last_obj_offset = 0; + off_t offset, offset_one, last_obj_offset = 0; struct pack_header hdr; - int do_progress = progress; - - if (pack_to_stdout) { - f = sha1fd(1, "<stdout>"); - do_progress >>= 1; - } else { - int fd = open_object_dir_tmp("tmp_pack_XXXXXX"); - if (fd < 0) - die("unable to create %s: %s\n", tmpname, strerror(errno)); - pack_tmp_name = xstrdup(tmpname); - f = sha1fd(fd, pack_tmp_name); - } + int do_progress = progress >> pack_to_stdout; + uint32_t nr_remaining = nr_result; if (do_progress) start_progress(&progress_state, "Writing %u objects...", "", nr_result); + written_list = xmalloc(nr_objects * sizeof(struct object_entry *)); - hdr.hdr_signature = htonl(PACK_SIGNATURE); - hdr.hdr_version = htonl(PACK_VERSION); - hdr.hdr_entries = htonl(nr_result); - sha1write(f, &hdr, sizeof(hdr)); - offset = sizeof(hdr); - if (!nr_result) - goto done; - for (i = 0; i < nr_objects; i++) { - last_obj_offset = offset; - offset = write_one(f, objects + i, offset); - if (do_progress) - display_progress(&progress_state, written); - } + do { + if (pack_to_stdout) { + f = sha1fd(1, "<stdout>"); + } else { + int fd = open_object_dir_tmp("tmp_pack_XXXXXX"); + if (fd < 0) + die("unable to create %s: %s\n", tmpname, strerror(errno)); + pack_tmp_name = xstrdup(tmpname); + f = sha1fd(fd, pack_tmp_name); + } + + hdr.hdr_signature = htonl(PACK_SIGNATURE); + hdr.hdr_version = htonl(PACK_VERSION); + hdr.hdr_entries = htonl(nr_remaining); + sha1write(f, &hdr, sizeof(hdr)); + offset = sizeof(hdr); + nr_written = 0; + for (; i < nr_objects; i++) { + last_obj_offset = offset; + offset_one = write_one(f, objects + i, offset); + if (!offset_one) + break; + offset = offset_one; + if (do_progress) + display_progress(&progress_state, written); + } + + /* + * Did we write the wrong # entries in the header? + * If so, rewrite it like in fast-import + */ + if (pack_to_stdout || nr_written == nr_remaining) { + sha1close(f, pack_file_sha1, 1); + } else { + sha1close(f, pack_file_sha1, 0); + fixup_pack_header_footer(f->fd, pack_file_sha1, pack_tmp_name, nr_written); + close(f->fd); + } + + if (!pack_to_stdout) { + unsigned char object_list_sha1[20]; + mode_t mode = umask(0); + + umask(mode); + mode = 0444 & ~mode; + + write_index_file(last_obj_offset, object_list_sha1); + snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", + base_name, sha1_to_hex(object_list_sha1)); + if (adjust_perm(pack_tmp_name, mode)) + die("unable to make temporary pack file readable: %s", + strerror(errno)); + if (rename(pack_tmp_name, tmpname)) + die("unable to rename temporary pack file: %s", + strerror(errno)); + snprintf(tmpname, sizeof(tmpname), "%s-%s.idx", + base_name, sha1_to_hex(object_list_sha1)); + if (adjust_perm(idx_tmp_name, mode)) + die("unable to make temporary index file readable: %s", + strerror(errno)); + if (rename(idx_tmp_name, tmpname)) + die("unable to rename temporary index file: %s", + strerror(errno)); + puts(sha1_to_hex(object_list_sha1)); + } + + /* mark written objects as written to previous pack */ + for (j = 0; j < nr_written; j++) { + written_list[j]->offset = (off_t)-1; + } + nr_remaining -= nr_written; + } while (nr_remaining && i < nr_objects); + + free(written_list); if (do_progress) stop_progress(&progress_state); - done: if (written != nr_result) die("wrote %u objects while expecting %u", written, nr_result); - sha1close(f, pack_file_sha1, 1); - - return last_obj_offset; + /* + * We have scanned through [0 ... i). Since we have written + * the correct number of objects, the remaining [i ... nr_objects) + * items must be either already written (due to out-of-order delta base) + * or a preferred base. Count those which are neither and complain if any. + */ + for (j = 0; i < nr_objects; i++) { + struct object_entry *e = objects + i; + j += !e->offset && !e->preferred_base; + } + if (j) + die("wrote %u objects as expected but %u unwritten", written, j); } static int sha1_sort(const void *_a, const void *_b) @@ -572,18 +717,11 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1) idx_tmp_name = xstrdup(tmpname); f = sha1fd(fd, idx_tmp_name); - if (nr_result) { - uint32_t j = 0; - sorted_by_sha = - xcalloc(nr_result, sizeof(struct object_entry *)); - for (i = 0; i < nr_objects; i++) - if (!objects[i].preferred_base) - sorted_by_sha[j++] = objects + i; - if (j != nr_result) - die("listed %u objects while expecting %u", j, nr_result); - qsort(sorted_by_sha, nr_result, sizeof(*sorted_by_sha), sha1_sort); + if (nr_written) { + sorted_by_sha = written_list; + qsort(sorted_by_sha, nr_written, sizeof(*sorted_by_sha), sha1_sort); list = sorted_by_sha; - last = sorted_by_sha + nr_result; + last = sorted_by_sha + nr_written; } else sorted_by_sha = list = last = NULL; @@ -621,7 +759,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1) /* Write the actual SHA1 entries. */ list = sorted_by_sha; - for (i = 0; i < nr_result; i++) { + for (i = 0; i < nr_written; i++) { struct object_entry *entry = *list++; if (index_version < 2) { uint32_t offset = htonl(entry->offset); @@ -636,7 +774,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1) /* write the crc32 table */ list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_written; i++) { struct object_entry *entry = *list++; uint32_t crc32_val = htonl(entry->crc32); sha1write(f, &crc32_val, 4); @@ -644,7 +782,7 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1) /* write the 32-bit offset table */ list = sorted_by_sha; - for (i = 0; i < nr_objects; i++) { + for (i = 0; i < nr_written; i++) { struct object_entry *entry = *list++; uint32_t offset = (entry->offset <= index_off32_limit) ? entry->offset : (0x80000000 | nr_large_offset++); @@ -669,7 +807,6 @@ static void write_index_file(off_t last_obj_offset, unsigned char *sha1) sha1write(f, pack_file_sha1, 20); sha1close(f, NULL, 1); - free(sorted_by_sha); SHA1_Final(sha1, &ctx); } @@ -1569,8 +1706,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) int use_internal_rev_list = 0; int thin = 0; uint32_t i; - off_t last_obj_offset; - const char *base_name = NULL; const char **rp_av; int rp_ac_alloc = 64; int rp_ac; @@ -1616,6 +1751,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) pack_compression_level = level; continue; } + if (!prefixcmp(arg, "--max-pack-size=")) { + char *end; + pack_size_limit = strtoul(arg+16, &end, 0) * 1024 * 1024; + if (!arg[16] || *end) + usage(pack_usage); + continue; + } if (!prefixcmp(arg, "--window=")) { char *end; window = strtoul(arg+9, &end, 0); @@ -1714,6 +1856,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (pack_to_stdout != !base_name) usage(pack_usage); + if (pack_to_stdout && pack_size_limit) + die("--max-pack-size cannot be used to build a pack for transfer."); + if (!pack_to_stdout && thin) die("--thin cannot be used to build an indexable pack."); @@ -1739,33 +1884,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) fprintf(stderr, "Result has %u objects.\n", nr_result); if (nr_result) prepare_pack(window, depth); - last_obj_offset = write_pack_file(); - if (!pack_to_stdout) { - unsigned char object_list_sha1[20]; - mode_t mode = umask(0); - - umask(mode); - mode = 0444 & ~mode; - - write_index_file(last_obj_offset, object_list_sha1); - snprintf(tmpname, sizeof(tmpname), "%s-%s.pack", - base_name, sha1_to_hex(object_list_sha1)); - if (adjust_perm(pack_tmp_name, mode)) - die("unable to make temporary pack file readable: %s", - strerror(errno)); - if (rename(pack_tmp_name, tmpname)) - die("unable to rename temporary pack file: %s", - strerror(errno)); - snprintf(tmpname, sizeof(tmpname), "%s-%s.idx", - base_name, sha1_to_hex(object_list_sha1)); - if (adjust_perm(idx_tmp_name, mode)) - die("unable to make temporary index file readable: %s", - strerror(errno)); - if (rename(idx_tmp_name, tmpname)) - die("unable to rename temporary index file: %s", - strerror(errno)); - puts(sha1_to_hex(object_list_sha1)); - } + write_pack_file(); if (progress) fprintf(stderr, "Total %u (delta %u), reused %u (delta %u)\n", written, written_delta, reused, reused_delta); diff --git a/csum-file.c b/csum-file.c index 7088f6e93f..5109342624 100644 --- a/csum-file.c +++ b/csum-file.c @@ -29,18 +29,20 @@ static void sha1flush(struct sha1file *f, unsigned int count) } } -int sha1close(struct sha1file *f, unsigned char *result, int update) +int sha1close(struct sha1file *f, unsigned char *result, int final) { unsigned offset = f->offset; if (offset) { SHA1_Update(&f->ctx, f->buffer, offset); sha1flush(f, offset); + f->offset = 0; } + if (!final) + return 0; /* only want to flush (no checksum write, no close) */ SHA1_Final(f->buffer, &f->ctx); if (result) hashcpy(result, f->buffer); - if (update) - sha1flush(f, 20); + sha1flush(f, 20); if (close(f->fd)) die("%s: sha1 file error on close (%s)", f->name, strerror(errno)); free(f); diff --git a/git-repack.sh b/git-repack.sh index 8bf66a4fe8..8c32724be7 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -3,7 +3,7 @@ # Copyright (c) 2005 Linus Torvalds # -USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--window=N] [--depth=N]' +USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--depth=N]' SUBDIRECTORY_OK='Yes' . git-sh-setup @@ -18,6 +18,7 @@ do -q) quiet=-q ;; -f) no_reuse=--no-reuse-object ;; -l) local=--local ;; + --max-pack-size=*) extra="$extra $1" ;; --window=*) extra="$extra $1" ;; --depth=*) extra="$extra $1" ;; *) usage ;; @@ -35,7 +36,7 @@ true) esac PACKDIR="$GIT_OBJECT_DIRECTORY/pack" -PACKTMP="$GIT_DIR/.tmp-$$-pack" +PACKTMP="$GIT_OBJECT_DIRECTORY/.tmp-$$-pack" rm -f "$PACKTMP"-* trap 'rm -f "$PACKTMP"-*' 0 1 2 3 15 @@ -62,11 +63,13 @@ case ",$all_into_one," in esac args="$args $local $quiet $no_reuse$extra" -name=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") || +names=$(git-pack-objects --non-empty --all --reflog $args </dev/null "$PACKTMP") || exit 1 -if [ -z "$name" ]; then +if [ -z "$names" ]; then echo Nothing new to pack. -else +fi +for name in $names ; do + fullbases="$fullbases pack-$name" chmod a-w "$PACKTMP-$name.pack" chmod a-w "$PACKTMP-$name.idx" if test "$quiet" != '-q'; then @@ -92,7 +95,7 @@ else exit 1 } rm -f "$PACKDIR/old-pack-$name.pack" "$PACKDIR/old-pack-$name.idx" -fi +done if test "$remove_redundant" = t then @@ -103,8 +106,8 @@ then ( cd "$PACKDIR" && for e in $existing do - case "$e" in - pack-$name) ;; + case " $fullbases " in + *" $e "*) ;; *) rm -f "$e.pack" "$e.idx" "$e.keep" ;; esac done |