diff options
-rw-r--r-- | Makefile | 1 | ||||
-rw-r--r-- | builtin/pack-objects.c | 40 | ||||
-rw-r--r-- | cache.h | 8 | ||||
-rw-r--r-- | mru.c | 50 | ||||
-rw-r--r-- | mru.h | 45 | ||||
-rw-r--r-- | pack-write.c | 1 | ||||
-rw-r--r-- | sha1_file.c | 66 | ||||
-rwxr-xr-x | t/perf/p5303-many-packs.sh | 87 |
8 files changed, 248 insertions, 50 deletions
@@ -755,6 +755,7 @@ LIB_OBJS += merge.o LIB_OBJS += merge-blobs.o LIB_OBJS += merge-recursive.o LIB_OBJS += mergesort.o +LIB_OBJS += mru.o LIB_OBJS += name-hash.o LIB_OBJS += notes.o LIB_OBJS += notes-cache.o diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 92e2e5f7a8..4a63398960 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -46,6 +46,7 @@ static int keep_unreachable, unpack_unreachable, include_tag; static unsigned long unpack_unreachable_expiration; static int pack_loose_unreachable; static int local; +static int have_non_local_packs; static int incremental; static int ignore_packed_keep; static int allow_ofs_delta; @@ -978,6 +979,23 @@ static int want_object_in_pack(const unsigned char *sha1, return 1; if (incremental) return 0; + + /* + * When asked to do --local (do not include an + * object that appears in a pack we borrow + * from elsewhere) or --honor-pack-keep (do not + * include an object that appears in a pack marked + * with .keep), we need to make sure no copy of this + * object come from in _any_ pack that causes us to + * omit it, and need to complete this loop. When + * neither option is in effect, we know the object + * we just found is going to be packed, so break + * out of the loop to return 1 now. + */ + if (!ignore_packed_keep && + (!local || !have_non_local_packs)) + break; + if (local && !p->pack_local) return 0; if (ignore_packed_keep && p->pack_local && p->pack_keep) @@ -2784,6 +2802,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) progress = 2; prepare_packed_git(); + if (ignore_packed_keep) { + struct packed_git *p; + for (p = packed_git; p; p = p->next) + if (p->pack_local && p->pack_keep) + break; + if (!p) /* no keep-able packs found */ + ignore_packed_keep = 0; + } + if (local) { + /* + * unlike ignore_packed_keep above, we do not want to + * unset "local" based on looking at packs, as it + * also covers non-local objects + */ + struct packed_git *p; + for (p = packed_git; p; p = p->next) { + if (!p->pack_local) { + have_non_local_packs = 1; + break; + } + } + } if (progress) progress_state = start_progress(_("Counting objects"), 0); @@ -1378,6 +1378,13 @@ extern struct packed_git { char pack_name[FLEX_ARRAY]; /* more */ } *packed_git; +/* + * A most-recently-used ordered version of the packed_git list, which can + * be iterated instead of packed_git (and marked via mru_mark). + */ +struct mru; +extern struct mru *packed_git_mru; + struct pack_entry { off_t offset; unsigned char sha1[20]; @@ -1417,7 +1424,6 @@ extern unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t extern void close_pack_windows(struct packed_git *); extern void close_all_packs(void); extern void unuse_pack(struct pack_window **); -extern void free_pack_by_name(const char *); extern void clear_delta_base_cache(void); extern struct packed_git *add_packed_git(const char *path, size_t path_len, int local); @@ -0,0 +1,50 @@ +#include "cache.h" +#include "mru.h" + +void mru_append(struct mru *mru, void *item) +{ + struct mru_entry *cur = xmalloc(sizeof(*cur)); + cur->item = item; + cur->prev = mru->tail; + cur->next = NULL; + + if (mru->tail) + mru->tail->next = cur; + else + mru->head = cur; + mru->tail = cur; +} + +void mru_mark(struct mru *mru, struct mru_entry *entry) +{ + /* If we're already at the front of the list, nothing to do */ + if (mru->head == entry) + return; + + /* Otherwise, remove us from our current slot... */ + if (entry->prev) + entry->prev->next = entry->next; + if (entry->next) + entry->next->prev = entry->prev; + else + mru->tail = entry->prev; + + /* And insert us at the beginning. */ + entry->prev = NULL; + entry->next = mru->head; + if (mru->head) + mru->head->prev = entry; + mru->head = entry; +} + +void mru_clear(struct mru *mru) +{ + struct mru_entry *p = mru->head; + + while (p) { + struct mru_entry *to_free = p; + p = p->next; + free(to_free); + } + mru->head = mru->tail = NULL; +} @@ -0,0 +1,45 @@ +#ifndef MRU_H +#define MRU_H + +/** + * A simple most-recently-used cache, backed by a doubly-linked list. + * + * Usage is roughly: + * + * // Create a list. Zero-initialization is required. + * static struct mru cache; + * mru_append(&cache, item); + * ... + * + * // Iterate in MRU order. + * struct mru_entry *p; + * for (p = cache.head; p; p = p->next) { + * if (matches(p->item)) + * break; + * } + * + * // Mark an item as used, moving it to the front of the list. + * mru_mark(&cache, p); + * + * // Reset the list to empty, cleaning up all resources. + * mru_clear(&cache); + * + * Note that you SHOULD NOT call mru_mark() and then continue traversing the + * list; it reorders the marked item to the front of the list, and therefore + * you will begin traversing the whole list again. + */ + +struct mru_entry { + void *item; + struct mru_entry *prev, *next; +}; + +struct mru { + struct mru_entry *head, *tail; +}; + +void mru_append(struct mru *mru, void *item); +void mru_mark(struct mru *mru, struct mru_entry *entry); +void mru_clear(struct mru *mru); + +#endif /* MRU_H */ diff --git a/pack-write.c b/pack-write.c index 33293ce2a6..ea0b788130 100644 --- a/pack-write.c +++ b/pack-write.c @@ -354,7 +354,6 @@ void finish_tmp_packfile(struct strbuf *name_buffer, die_errno("unable to make temporary index file readable"); strbuf_addf(name_buffer, "%s.pack", sha1_to_hex(sha1)); - free_pack_by_name(name_buffer->buf); if (rename(pack_tmp_name, name_buffer->buf)) die_errno("unable to rename temporary pack file"); diff --git a/sha1_file.c b/sha1_file.c index cb571ac6e8..3066b5f71c 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -23,6 +23,7 @@ #include "bulk-checkin.h" #include "streaming.h" #include "dir.h" +#include "mru.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -59,14 +60,6 @@ static struct cached_object empty_tree = { 0 }; -/* - * A pointer to the last packed_git in which an object was found. - * When an object is sought, we look in this packfile first, because - * objects that are looked up at similar times are often in the same - * packfile as one another. - */ -static struct packed_git *last_found_pack; - static struct cached_object *find_cached_object(const unsigned char *sha1) { int i; @@ -522,6 +515,9 @@ static size_t peak_pack_mapped; static size_t pack_mapped; struct packed_git *packed_git; +static struct mru packed_git_mru_storage; +struct mru *packed_git_mru = &packed_git_mru_storage; + void pack_report(void) { fprintf(stderr, @@ -891,36 +887,6 @@ void close_pack_index(struct packed_git *p) } } -/* - * This is used by git-repack in case a newly created pack happens to - * contain the same set of objects as an existing one. In that case - * the resulting file might be different even if its name would be the - * same. It is best to close any reference to the old pack before it is - * replaced on disk. Of course no index pointers or windows for given pack - * must subsist at this point. If ever objects from this pack are requested - * again, the new version of the pack will be reinitialized through - * reprepare_packed_git(). - */ -void free_pack_by_name(const char *pack_name) -{ - struct packed_git *p, **pp = &packed_git; - - while (*pp) { - p = *pp; - if (strcmp(pack_name, p->pack_name) == 0) { - clear_delta_base_cache(); - close_pack(p); - free(p->bad_object_sha1); - *pp = p->next; - if (last_found_pack == p) - last_found_pack = NULL; - free(p); - return; - } - pp = &p->next; - } -} - static unsigned int get_max_fd_limit(void) { #ifdef RLIMIT_NOFILE @@ -1385,6 +1351,15 @@ static void rearrange_packed_git(void) free(ary); } +static void prepare_packed_git_mru(void) +{ + struct packed_git *p; + + mru_clear(packed_git_mru); + for (p = packed_git; p; p = p->next) + mru_append(packed_git_mru, p); +} + static int prepare_packed_git_run_once = 0; void prepare_packed_git(void) { @@ -1400,6 +1375,7 @@ void prepare_packed_git(void) alt->name[-1] = '/'; } rearrange_packed_git(); + prepare_packed_git_mru(); prepare_packed_git_run_once = 1; } @@ -2604,21 +2580,15 @@ static int fill_pack_entry(const unsigned char *sha1, */ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) { - struct packed_git *p; + struct mru_entry *p; prepare_packed_git(); if (!packed_git) return 0; - if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack)) - return 1; - - for (p = packed_git; p; p = p->next) { - if (p == last_found_pack) - continue; /* we already checked this one */ - - if (fill_pack_entry(sha1, e, p)) { - last_found_pack = p; + for (p = packed_git_mru->head; p; p = p->next) { + if (fill_pack_entry(sha1, e, p->item)) { + mru_mark(packed_git_mru, p); return 1; } } diff --git a/t/perf/p5303-many-packs.sh b/t/perf/p5303-many-packs.sh new file mode 100755 index 0000000000..3779851941 --- /dev/null +++ b/t/perf/p5303-many-packs.sh @@ -0,0 +1,87 @@ +#!/bin/sh + +test_description='performance with large numbers of packs' +. ./perf-lib.sh + +test_perf_large_repo + +# A real many-pack situation would probably come from having a lot of pushes +# over time. We don't know how big each push would be, but we can fake it by +# just walking the first-parent chain and having every 5 commits be their own +# "push". This isn't _entirely_ accurate, as real pushes would have some +# duplicate objects due to thin-pack fixing, but it's a reasonable +# approximation. +# +# And then all of the rest of the objects can go in a single packfile that +# represents the state before any of those pushes (actually, we'll generate +# that first because in such a setup it would be the oldest pack, and we sort +# the packs by reverse mtime inside git). +repack_into_n () { + rm -rf staging && + mkdir staging && + + git rev-list --first-parent HEAD | + sed -n '1~5p' | + head -n "$1" | + perl -e 'print reverse <>' \ + >pushes + + # create base packfile + head -n 1 pushes | + git pack-objects --delta-base-offset --revs staging/pack + + # and then incrementals between each pair of commits + last= && + while read rev + do + if test -n "$last"; then + { + echo "$rev" && + echo "^$last" + } | + git pack-objects --delta-base-offset --revs \ + staging/pack || return 1 + fi + last=$rev + done <pushes && + + # and install the whole thing + rm -f .git/objects/pack/* && + mv staging/* .git/objects/pack/ +} + +# Pretend we just have a single branch and no reflogs, and that everything is +# in objects/pack; that makes our fake pack-building via repack_into_n() +# much simpler. +test_expect_success 'simplify reachability' ' + tip=$(git rev-parse --verify HEAD) && + git for-each-ref --format="option no-deref%0adelete %(refname)" | + git update-ref --stdin && + rm -rf .git/logs && + git update-ref refs/heads/master $tip && + git symbolic-ref HEAD refs/heads/master && + git repack -ad +' + +for nr_packs in 1 50 1000 +do + test_expect_success "create $nr_packs-pack scenario" ' + repack_into_n $nr_packs + ' + + test_perf "rev-list ($nr_packs)" ' + git rev-list --objects --all >/dev/null + ' + + # This simulates the interesting part of the repack, which is the + # actual pack generation, without smudging the on-disk setup + # between trials. + test_perf "repack ($nr_packs)" ' + git pack-objects --keep-true-parents \ + --honor-pack-keep --non-empty --all \ + --reflog --indexed-objects --delta-base-offset \ + --stdout </dev/null >/dev/null + ' +done + +test_done |