diff options
author | Junio C Hamano <gitster@pobox.com> | 2014-10-29 10:07:56 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2014-10-29 10:07:56 -0700 |
commit | d70e331c0e8eaeb0bd75ae3020c3be71de075ff7 (patch) | |
tree | 645e25685926321704394063059a928d7492a9e8 /builtin | |
parent | Merge branch 'bc/asciidoctor' (diff) | |
parent | drop add_object_array_with_mode (diff) | |
download | tgif-d70e331c0e8eaeb0bd75ae3020c3be71de075ff7.tar.xz |
Merge branch 'jk/prune-mtime'
Tighten the logic to decide that an unreachable cruft is
sufficiently old by covering corner cases such as an ancient object
becoming reachable and then going unreachable again, in which case
its retention period should be prolonged.
* jk/prune-mtime: (28 commits)
drop add_object_array_with_mode
revision: remove definition of unused 'add_object' function
pack-objects: double-check options before discarding objects
repack: pack objects mentioned by the index
pack-objects: use argv_array
reachable: use revision machinery's --indexed-objects code
rev-list: add --indexed-objects option
rev-list: document --reflog option
t5516: test pushing a tag of an otherwise unreferenced blob
traverse_commit_list: support pending blobs/trees with paths
make add_object_array_with_context interface more sane
write_sha1_file: freshen existing objects
pack-objects: match prune logic for discarding objects
pack-objects: refactor unpack-unreachable expiration check
prune: keep objects reachable from recent objects
sha1_file: add for_each iterators for loose and packed objects
count-objects: use for_each_loose_file_in_objdir
count-objects: do not use xsize_t when counting object size
prune-packed: use for_each_loose_file_in_objdir
reachable: mark index blobs as SEEN
...
Diffstat (limited to 'builtin')
-rw-r--r-- | builtin/count-objects.c | 101 | ||||
-rw-r--r-- | builtin/grep.c | 8 | ||||
-rw-r--r-- | builtin/pack-objects.c | 86 | ||||
-rw-r--r-- | builtin/prune-packed.c | 69 | ||||
-rw-r--r-- | builtin/prune.c | 89 | ||||
-rw-r--r-- | builtin/reflog.c | 2 | ||||
-rw-r--r-- | builtin/repack.c | 1 |
7 files changed, 157 insertions, 199 deletions
diff --git a/builtin/count-objects.c b/builtin/count-objects.c index a7f70cb858..e47ef0b1af 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -11,6 +11,9 @@ static unsigned long garbage; static off_t size_garbage; +static int verbose; +static unsigned long loose, packed, packed_loose; +static off_t loose_size; static void real_report_garbage(const char *desc, const char *path) { @@ -21,61 +24,31 @@ static void real_report_garbage(const char *desc, const char *path) garbage++; } -static void count_objects(DIR *d, char *path, int len, int verbose, - unsigned long *loose, - off_t *loose_size, - unsigned long *packed_loose) +static void loose_garbage(const char *path) { - struct dirent *ent; - while ((ent = readdir(d)) != NULL) { - char hex[41]; - unsigned char sha1[20]; - const char *cp; - int bad = 0; + if (verbose) + report_garbage("garbage found", path); +} - if (is_dot_or_dotdot(ent->d_name)) - continue; - for (cp = ent->d_name; *cp; cp++) { - int ch = *cp; - if (('0' <= ch && ch <= '9') || - ('a' <= ch && ch <= 'f')) - continue; - bad = 1; - break; - } - if (cp - ent->d_name != 38) - bad = 1; - else { - struct stat st; - memcpy(path + len + 3, ent->d_name, 38); - path[len + 2] = '/'; - path[len + 41] = 0; - if (lstat(path, &st) || !S_ISREG(st.st_mode)) - bad = 1; - else - (*loose_size) += xsize_t(on_disk_bytes(st)); - } - if (bad) { - if (verbose) { - struct strbuf sb = STRBUF_INIT; - strbuf_addf(&sb, "%.*s/%s", - len + 2, path, ent->d_name); - report_garbage("garbage found", sb.buf); - strbuf_release(&sb); - } - continue; - } - (*loose)++; - if (!verbose) - continue; - memcpy(hex, path+len, 2); - memcpy(hex+2, ent->d_name, 38); - hex[40] = 0; - if (get_sha1_hex(hex, sha1)) - die("internal error"); - if (has_sha1_pack(sha1)) - (*packed_loose)++; +static int count_loose(const unsigned char *sha1, const char *path, void *data) +{ + struct stat st; + + if (lstat(path, &st) || !S_ISREG(st.st_mode)) + loose_garbage(path); + else { + loose_size += on_disk_bytes(st); + loose++; + if (verbose && has_sha1_pack(sha1)) + packed_loose++; } + return 0; +} + +static int count_cruft(const char *basename, const char *path, void *data) +{ + loose_garbage(path); + return 0; } static char const * const count_objects_usage[] = { @@ -85,12 +58,7 @@ static char const * const count_objects_usage[] = { int cmd_count_objects(int argc, const char **argv, const char *prefix) { - int i, verbose = 0, human_readable = 0; - const char *objdir = get_object_directory(); - int len = strlen(objdir); - char *path = xmalloc(len + 50); - unsigned long loose = 0, packed = 0, packed_loose = 0; - off_t loose_size = 0; + int human_readable = 0; struct option opts[] = { OPT__VERBOSE(&verbose, N_("be verbose")), OPT_BOOL('H', "human-readable", &human_readable, @@ -104,19 +72,10 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) usage_with_options(count_objects_usage, opts); if (verbose) report_garbage = real_report_garbage; - memcpy(path, objdir, len); - if (len && objdir[len-1] != '/') - path[len++] = '/'; - for (i = 0; i < 256; i++) { - DIR *d; - sprintf(path + len, "%02x", i); - d = opendir(path); - if (!d) - continue; - count_objects(d, path, len, verbose, - &loose, &loose_size, &packed_loose); - closedir(d); - } + + for_each_loose_file_in_objdir(get_object_directory(), + count_loose, count_cruft, NULL, NULL); + if (verbose) { struct packed_git *p; unsigned long num_pack = 0; diff --git a/builtin/grep.c b/builtin/grep.c index c86a142f30..4063882f06 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -456,10 +456,10 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, } static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, - struct object *obj, const char *name, struct object_context *oc) + struct object *obj, const char *name, const char *path) { if (obj->type == OBJ_BLOB) - return grep_sha1(opt, obj->sha1, name, 0, oc ? oc->path : NULL); + return grep_sha1(opt, obj->sha1, name, 0, path); if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) { struct tree_desc tree; void *data; @@ -501,7 +501,7 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec, for (i = 0; i < nr; i++) { struct object *real_obj; real_obj = deref_tag(list->objects[i].item, NULL, 0); - if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].context)) { + if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].path)) { hit = 1; if (opt->status_only) break; @@ -821,7 +821,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) struct object *object = parse_object_or_die(sha1, arg); if (!seen_dashdash) verify_non_filename(prefix, arg); - add_object_array_with_context(object, arg, &list, xmemdupz(&oc, sizeof(struct object_context))); + add_object_array_with_path(object, arg, &list, oc.mode, oc.path); continue; } if (!strcmp(arg, "--")) { diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 78c659a6b4..3f9f5c7760 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -20,6 +20,9 @@ #include "streaming.h" #include "thread-utils.h" #include "pack-bitmap.h" +#include "reachable.h" +#include "sha1-array.h" +#include "argv-array.h" static const char *pack_usage[] = { N_("git pack-objects --stdout [options...] [< ref-list | < object-list]"), @@ -2406,6 +2409,27 @@ static int has_sha1_pack_kept_or_nonlocal(const unsigned char *sha1) return 0; } +/* + * Store a list of sha1s that are should not be discarded + * because they are either written too recently, or are + * reachable from another object that was. + * + * This is filled by get_object_list. + */ +static struct sha1_array recent_objects; + +static int loosened_object_can_be_discarded(const unsigned char *sha1, + unsigned long mtime) +{ + if (!unpack_unreachable_expiration) + return 0; + if (mtime > unpack_unreachable_expiration) + return 0; + if (sha1_array_lookup(&recent_objects, sha1) >= 0) + return 0; + return 1; +} + static void loosen_unused_packed_objects(struct rev_info *revs) { struct packed_git *p; @@ -2416,17 +2440,14 @@ static void loosen_unused_packed_objects(struct rev_info *revs) if (!p->pack_local || p->pack_keep) continue; - if (unpack_unreachable_expiration && - p->mtime < unpack_unreachable_expiration) - continue; - if (open_pack_index(p)) die("cannot open pack index"); for (i = 0; i < p->num_objects; i++) { sha1 = nth_packed_object_sha1(p, i); if (!packlist_find(&to_pack, sha1, NULL) && - !has_sha1_pack_kept_or_nonlocal(sha1)) + !has_sha1_pack_kept_or_nonlocal(sha1) && + !loosened_object_can_be_discarded(sha1, p->mtime)) if (force_object_loose(sha1, p->mtime)) die("unable to force loose object"); } @@ -2462,6 +2483,19 @@ static int get_object_list_from_bitmap(struct rev_info *revs) return 0; } +static void record_recent_object(struct object *obj, + const struct name_path *path, + const char *last, + void *data) +{ + sha1_array_append(&recent_objects, obj->sha1); +} + +static void record_recent_commit(struct commit *commit, void *data) +{ + sha1_array_append(&recent_objects, commit->object.sha1); +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; @@ -2509,10 +2543,23 @@ static void get_object_list(int ac, const char **av) mark_edges_uninteresting(&revs, show_edge); traverse_commit_list(&revs, show_commit, show_object, NULL); + if (unpack_unreachable_expiration) { + revs.ignore_missing_links = 1; + if (add_unseen_recent_objects_to_traversal(&revs, + unpack_unreachable_expiration)) + die("unable to add recent objects"); + if (prepare_revision_walk(&revs)) + die("revision walk setup failed"); + traverse_commit_list(&revs, record_recent_commit, + record_recent_object, NULL); + } + if (keep_unreachable) add_objects_in_unpacked_packs(&revs); if (unpack_unreachable) loosen_unused_packed_objects(&revs); + + sha1_array_clear(&recent_objects); } static int option_parse_index_version(const struct option *opt, @@ -2567,9 +2614,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) int use_internal_rev_list = 0; int thin = 0; int all_progress_implied = 0; - const char *rp_av[6]; - int rp_ac = 0; + struct argv_array rp = ARGV_ARRAY_INIT; int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0; + int rev_list_index = 0; struct option pack_objects_options[] = { OPT_SET_INT('q', "quiet", &progress, N_("do not show progress meter"), 0), @@ -2616,6 +2663,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) { OPTION_SET_INT, 0, "reflog", &rev_list_reflog, NULL, N_("include objects referred by reflog entries"), PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, + { OPTION_SET_INT, 0, "indexed-objects", &rev_list_index, NULL, + N_("include objects referred to by the index"), + PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, 1 }, OPT_BOOL(0, "stdout", &pack_to_stdout, N_("output pack to stdout")), OPT_BOOL(0, "include-tag", &include_tag, @@ -2658,24 +2708,28 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (pack_to_stdout != !base_name || argc) usage_with_options(pack_usage, pack_objects_options); - rp_av[rp_ac++] = "pack-objects"; + argv_array_push(&rp, "pack-objects"); if (thin) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--objects-edge"; + argv_array_push(&rp, "--objects-edge"); } else - rp_av[rp_ac++] = "--objects"; + argv_array_push(&rp, "--objects"); if (rev_list_all) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--all"; + argv_array_push(&rp, "--all"); } if (rev_list_reflog) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--reflog"; + argv_array_push(&rp, "--reflog"); + } + if (rev_list_index) { + use_internal_rev_list = 1; + argv_array_push(&rp, "--indexed-objects"); } if (rev_list_unpacked) { use_internal_rev_list = 1; - rp_av[rp_ac++] = "--unpacked"; + argv_array_push(&rp, "--unpacked"); } if (!reuse_object) @@ -2706,6 +2760,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (keep_unreachable && unpack_unreachable) die("--keep-unreachable and --unpack-unreachable are incompatible."); + if (!rev_list_all || !rev_list_reflog || !rev_list_index) + unpack_unreachable_expiration = 0; if (!use_internal_rev_list || !pack_to_stdout || is_repository_shallow()) use_bitmap_index = 0; @@ -2723,8 +2779,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!use_internal_rev_list) read_object_list_from_stdin(); else { - rp_av[rp_ac] = NULL; - get_object_list(rp_ac, rp_av); + get_object_list(rp.argc, rp.argv); + argv_array_clear(&rp); } cleanup_preferred_base(); if (include_tag && nr_result) diff --git a/builtin/prune-packed.c b/builtin/prune-packed.c index d430731d70..f24a2c2bdc 100644 --- a/builtin/prune-packed.c +++ b/builtin/prune-packed.c @@ -10,65 +10,42 @@ static const char * const prune_packed_usage[] = { static struct progress *progress; -static void prune_dir(int i, DIR *dir, struct strbuf *pathname, int opts) +static int prune_subdir(int nr, const char *path, void *data) { - struct dirent *de; - char hex[40]; - int top_len = pathname->len; + int *opts = data; + display_progress(progress, nr + 1); + if (!(*opts & PRUNE_PACKED_DRY_RUN)) + rmdir(path); + return 0; +} + +static int prune_object(const unsigned char *sha1, const char *path, + void *data) +{ + int *opts = data; - sprintf(hex, "%02x", i); - while ((de = readdir(dir)) != NULL) { - unsigned char sha1[20]; - if (strlen(de->d_name) != 38) - continue; - memcpy(hex + 2, de->d_name, 38); - if (get_sha1_hex(hex, sha1)) - continue; - if (!has_sha1_pack(sha1)) - continue; + if (!has_sha1_pack(sha1)) + return 0; - strbuf_add(pathname, de->d_name, 38); - if (opts & PRUNE_PACKED_DRY_RUN) - printf("rm -f %s\n", pathname->buf); - else - unlink_or_warn(pathname->buf); - display_progress(progress, i + 1); - strbuf_setlen(pathname, top_len); - } + if (*opts & PRUNE_PACKED_DRY_RUN) + printf("rm -f %s\n", path); + else + unlink_or_warn(path); + return 0; } void prune_packed_objects(int opts) { - int i; - const char *dir = get_object_directory(); - struct strbuf pathname = STRBUF_INIT; - int top_len; - - strbuf_addstr(&pathname, dir); if (opts & PRUNE_PACKED_VERBOSE) progress = start_progress_delay(_("Removing duplicate objects"), 256, 95, 2); - if (pathname.len && pathname.buf[pathname.len - 1] != '/') - strbuf_addch(&pathname, '/'); - - top_len = pathname.len; - for (i = 0; i < 256; i++) { - DIR *d; + for_each_loose_file_in_objdir(get_object_directory(), + prune_object, NULL, prune_subdir, &opts); - display_progress(progress, i + 1); - strbuf_setlen(&pathname, top_len); - strbuf_addf(&pathname, "%02x/", i); - d = opendir(pathname.buf); - if (!d) - continue; - prune_dir(i, d, &pathname, opts); - closedir(d); - strbuf_setlen(&pathname, top_len + 2); - rmdir(pathname.buf); - } + /* Ensure we show 100% before finishing progress */ + display_progress(progress, 256); stop_progress(&progress); - strbuf_release(&pathname); } int cmd_prune_packed(int argc, const char **argv, const char *prefix) diff --git a/builtin/prune.c b/builtin/prune.c index 144a3bdb33..04d3b12ae4 100644 --- a/builtin/prune.c +++ b/builtin/prune.c @@ -31,11 +31,23 @@ static int prune_tmp_file(const char *fullpath) return 0; } -static int prune_object(const char *fullpath, const unsigned char *sha1) +static int prune_object(const unsigned char *sha1, const char *fullpath, + void *data) { struct stat st; - if (lstat(fullpath, &st)) - return error("Could not stat '%s'", fullpath); + + /* + * Do we know about this object? + * It must have been reachable + */ + if (lookup_object(sha1)) + return 0; + + if (lstat(fullpath, &st)) { + /* report errors, but do not stop pruning */ + error("Could not stat '%s'", fullpath); + return 0; + } if (st.st_mtime > expire) return 0; if (show_only || verbose) { @@ -48,68 +60,20 @@ static int prune_object(const char *fullpath, const unsigned char *sha1) return 0; } -static int prune_dir(int i, struct strbuf *path) +static int prune_cruft(const char *basename, const char *path, void *data) { - size_t baselen = path->len; - DIR *dir = opendir(path->buf); - struct dirent *de; - - if (!dir) - return 0; - - while ((de = readdir(dir)) != NULL) { - char name[100]; - unsigned char sha1[20]; - - if (is_dot_or_dotdot(de->d_name)) - continue; - if (strlen(de->d_name) == 38) { - sprintf(name, "%02x", i); - memcpy(name+2, de->d_name, 39); - if (get_sha1_hex(name, sha1) < 0) - break; - - /* - * Do we know about this object? - * It must have been reachable - */ - if (lookup_object(sha1)) - continue; - - strbuf_addf(path, "/%s", de->d_name); - prune_object(path->buf, sha1); - strbuf_setlen(path, baselen); - continue; - } - if (starts_with(de->d_name, "tmp_obj_")) { - strbuf_addf(path, "/%s", de->d_name); - prune_tmp_file(path->buf); - strbuf_setlen(path, baselen); - continue; - } - fprintf(stderr, "bad sha1 file: %s/%s\n", path->buf, de->d_name); - } - closedir(dir); - if (!show_only) - rmdir(path->buf); + if (starts_with(basename, "tmp_obj_")) + prune_tmp_file(path); + else + fprintf(stderr, "bad sha1 file: %s\n", path); return 0; } -static void prune_object_dir(const char *path) +static int prune_subdir(int nr, const char *path, void *data) { - struct strbuf buf = STRBUF_INIT; - size_t baselen; - int i; - - strbuf_addstr(&buf, path); - strbuf_addch(&buf, '/'); - baselen = buf.len; - - for (i = 0; i < 256; i++) { - strbuf_addf(&buf, "%02x", i); - prune_dir(i, &buf); - strbuf_setlen(&buf, baselen); - } + if (!show_only) + rmdir(path); + return 0; } /* @@ -171,9 +135,10 @@ int cmd_prune(int argc, const char **argv, const char *prefix) if (show_progress) progress = start_progress_delay(_("Checking connectivity"), 0, 0, 2); - mark_reachable_objects(&revs, 1, progress); + mark_reachable_objects(&revs, 1, expire, progress); stop_progress(&progress); - prune_object_dir(get_object_directory()); + for_each_loose_file_in_objdir(get_object_directory(), prune_object, + prune_cruft, prune_subdir, NULL); prune_packed_objects(show_only ? PRUNE_PACKED_DRY_RUN : 0); remove_temporary_files(get_object_directory()); diff --git a/builtin/reflog.c b/builtin/reflog.c index b6388f75b0..2d85d260ca 100644 --- a/builtin/reflog.c +++ b/builtin/reflog.c @@ -649,7 +649,7 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix) init_revisions(&cb.revs, prefix); if (cb.verbose) printf("Marking reachable objects..."); - mark_reachable_objects(&cb.revs, 0, NULL); + mark_reachable_objects(&cb.revs, 0, 0, NULL); if (cb.verbose) putchar('\n'); } diff --git a/builtin/repack.c b/builtin/repack.c index 2aae05d364..28456206c5 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -209,6 +209,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix) argv_array_push(&cmd_args, "--non-empty"); argv_array_push(&cmd_args, "--all"); argv_array_push(&cmd_args, "--reflog"); + argv_array_push(&cmd_args, "--indexed-objects"); if (window) argv_array_pushf(&cmd_args, "--window=%s", window); if (window_memory) |