diff options
Diffstat (limited to 'builtin/pack-objects.c')
-rw-r--r-- | builtin/pack-objects.c | 202 |
1 files changed, 181 insertions, 21 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 166e52c700..c7af475485 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -23,6 +23,7 @@ #include "reachable.h" #include "sha1-array.h" #include "argv-array.h" +#include "mru.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"), @@ -60,8 +61,6 @@ static int delta_search_threads; static int pack_to_stdout; static int num_preferred_base; static struct progress *progress_state; -static int pack_compression_level = Z_DEFAULT_COMPRESSION; -static int pack_compression_seen; static struct packed_git *reuse_packfile; static uint32_t reuse_packfile_objects; @@ -719,7 +718,7 @@ static off_t write_reused_pack(struct sha1file *f) if (!is_pack_valid(reuse_packfile)) die("packfile is invalid: %s", reuse_packfile->pack_name); - fd = git_open_noatime(reuse_packfile->pack_name); + fd = git_open(reuse_packfile->pack_name); if (fd < 0) die_errno("unable to open packfile for reuse: %s", reuse_packfile->pack_name); @@ -994,7 +993,7 @@ static int want_object_in_pack(const unsigned char *sha1, struct packed_git **found_pack, off_t *found_offset) { - struct packed_git *p; + struct mru_entry *entry; int want; if (!exclude && local && has_loose_object_nonlocal(sha1)) @@ -1011,7 +1010,8 @@ static int want_object_in_pack(const unsigned char *sha1, return want; } - for (p = packed_git; p; p = p->next) { + for (entry = packed_git_mru->head; entry; entry = entry->next) { + struct packed_git *p = entry->item; off_t offset; if (p == *found_pack) @@ -1027,6 +1027,8 @@ static int want_object_in_pack(const unsigned char *sha1, *found_pack = p; } want = want_found_object(exclude, p); + if (!exclude && want > 0) + mru_mark(packed_git_mru, entry); if (want != -1) return want; } @@ -1527,6 +1529,170 @@ static int pack_offset_sort(const void *_a, const void *_b) (a->in_pack_offset > b->in_pack_offset); } +/* + * Drop an on-disk delta we were planning to reuse. Naively, this would + * just involve blanking out the "delta" field, but we have to deal + * with some extra book-keeping: + * + * 1. Removing ourselves from the delta_sibling linked list. + * + * 2. Updating our size/type to the non-delta representation. These were + * either not recorded initially (size) or overwritten with the delta type + * (type) when check_object() decided to reuse the delta. + * + * 3. Resetting our delta depth, as we are now a base object. + */ +static void drop_reused_delta(struct object_entry *entry) +{ + struct object_entry **p = &entry->delta->delta_child; + struct object_info oi = OBJECT_INFO_INIT; + + while (*p) { + if (*p == entry) + *p = (*p)->delta_sibling; + else + p = &(*p)->delta_sibling; + } + entry->delta = NULL; + entry->depth = 0; + + oi.sizep = &entry->size; + oi.typep = &entry->type; + if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) { + /* + * We failed to get the info from this pack for some reason; + * fall back to sha1_object_info, which may find another copy. + * And if that fails, the error will be recorded in entry->type + * and dealt with in prepare_pack(). + */ + entry->type = sha1_object_info(entry->idx.sha1, &entry->size); + } +} + +/* + * Follow the chain of deltas from this entry onward, throwing away any links + * that cause us to hit a cycle (as determined by the DFS state flags in + * the entries). + * + * We also detect too-long reused chains that would violate our --depth + * limit. + */ +static void break_delta_chains(struct object_entry *entry) +{ + /* + * The actual depth of each object we will write is stored as an int, + * as it cannot exceed our int "depth" limit. But before we break + * changes based no that limit, we may potentially go as deep as the + * number of objects, which is elsewhere bounded to a uint32_t. + */ + uint32_t total_depth; + struct object_entry *cur, *next; + + for (cur = entry, total_depth = 0; + cur; + cur = cur->delta, total_depth++) { + if (cur->dfs_state == DFS_DONE) { + /* + * We've already seen this object and know it isn't + * part of a cycle. We do need to append its depth + * to our count. + */ + total_depth += cur->depth; + break; + } + + /* + * We break cycles before looping, so an ACTIVE state (or any + * other cruft which made its way into the state variable) + * is a bug. + */ + if (cur->dfs_state != DFS_NONE) + die("BUG: confusing delta dfs state in first pass: %d", + cur->dfs_state); + + /* + * Now we know this is the first time we've seen the object. If + * it's not a delta, we're done traversing, but we'll mark it + * done to save time on future traversals. + */ + if (!cur->delta) { + cur->dfs_state = DFS_DONE; + break; + } + + /* + * Mark ourselves as active and see if the next step causes + * us to cycle to another active object. It's important to do + * this _before_ we loop, because it impacts where we make the + * cut, and thus how our total_depth counter works. + * E.g., We may see a partial loop like: + * + * A -> B -> C -> D -> B + * + * Cutting B->C breaks the cycle. But now the depth of A is + * only 1, and our total_depth counter is at 3. The size of the + * error is always one less than the size of the cycle we + * broke. Commits C and D were "lost" from A's chain. + * + * If we instead cut D->B, then the depth of A is correct at 3. + * We keep all commits in the chain that we examined. + */ + cur->dfs_state = DFS_ACTIVE; + if (cur->delta->dfs_state == DFS_ACTIVE) { + drop_reused_delta(cur); + cur->dfs_state = DFS_DONE; + break; + } + } + + /* + * And now that we've gone all the way to the bottom of the chain, we + * need to clear the active flags and set the depth fields as + * appropriate. Unlike the loop above, which can quit when it drops a + * delta, we need to keep going to look for more depth cuts. So we need + * an extra "next" pointer to keep going after we reset cur->delta. + */ + for (cur = entry; cur; cur = next) { + next = cur->delta; + + /* + * We should have a chain of zero or more ACTIVE states down to + * a final DONE. We can quit after the DONE, because either it + * has no bases, or we've already handled them in a previous + * call. + */ + if (cur->dfs_state == DFS_DONE) + break; + else if (cur->dfs_state != DFS_ACTIVE) + die("BUG: confusing delta dfs state in second pass: %d", + cur->dfs_state); + + /* + * If the total_depth is more than depth, then we need to snip + * the chain into two or more smaller chains that don't exceed + * the maximum depth. Most of the resulting chains will contain + * (depth + 1) entries (i.e., depth deltas plus one base), and + * the last chain (i.e., the one containing entry) will contain + * whatever entries are left over, namely + * (total_depth % (depth + 1)) of them. + * + * Since we are iterating towards decreasing depth, we need to + * decrement total_depth as we go, and we need to write to the + * entry what its final depth will be after all of the + * snipping. Since we're snipping into chains of length (depth + * + 1) entries, the final depth of an entry will be its + * original depth modulo (depth + 1). Any time we encounter an + * entry whose final depth is supposed to be zero, we snip it + * from its delta base, thereby making it so. + */ + cur->depth = (total_depth--) % (depth + 1); + if (!cur->depth) + drop_reused_delta(cur); + + cur->dfs_state = DFS_DONE; + } +} + static void get_object_details(void) { uint32_t i; @@ -1535,7 +1701,7 @@ static void get_object_details(void) sorted_by_offset = xcalloc(to_pack.nr_objects, sizeof(struct object_entry *)); for (i = 0; i < to_pack.nr_objects; i++) sorted_by_offset[i] = to_pack.objects + i; - qsort(sorted_by_offset, to_pack.nr_objects, sizeof(*sorted_by_offset), pack_offset_sort); + QSORT(sorted_by_offset, to_pack.nr_objects, pack_offset_sort); for (i = 0; i < to_pack.nr_objects; i++) { struct object_entry *entry = sorted_by_offset[i]; @@ -1544,6 +1710,13 @@ static void get_object_details(void) entry->no_try_delta = 1; } + /* + * This must happen in a second pass, since we rely on the delta + * information for the whole list being completed. + */ + for (i = 0; i < to_pack.nr_objects; i++) + break_delta_chains(&to_pack.objects[i]); + free(sorted_by_offset); } @@ -2257,7 +2430,7 @@ static void prepare_pack(int window, int depth) if (progress) progress_state = start_progress(_("Compressing objects"), nr_deltas); - qsort(delta_list, n, sizeof(*delta_list), type_size_sort); + QSORT(delta_list, n, type_size_sort); ll_find_deltas(delta_list, n, window+1, depth, &nr_done); stop_progress(&progress_state); if (nr_done != nr_deltas) @@ -2280,16 +2453,6 @@ static int git_pack_config(const char *k, const char *v, void *cb) depth = git_config_int(k, v); return 0; } - if (!strcmp(k, "pack.compression")) { - int level = git_config_int(k, v); - if (level == -1) - level = Z_DEFAULT_COMPRESSION; - else if (level < 0 || level > Z_BEST_COMPRESSION) - die("bad pack compression level %d", level); - pack_compression_level = level; - pack_compression_seen = 1; - return 0; - } if (!strcmp(k, "pack.deltacachesize")) { max_delta_cache_size = git_config_int(k, v); return 0; @@ -2449,8 +2612,7 @@ static void add_objects_in_unpacked_packs(struct rev_info *revs) } if (in_pack.nr) { - qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]), - ofscmp); + QSORT(in_pack.array, in_pack.nr, ofscmp); for (i = 0; i < in_pack.nr; i++) { struct object *o = in_pack.array[i].object; add_object_entry(o->oid.hash, o->type, "", 0); @@ -2782,8 +2944,6 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) reset_pack_idx_option(&pack_idx_opts); git_config(git_pack_config, NULL); - if (!pack_compression_seen && core_compression_seen) - pack_compression_level = core_compression_level; progress = isatty(2); argc = parse_options(argc, argv, prefix, pack_objects_options, |