From 2c3c4399477533329579ca6b84824ef0b125914f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 5 Sep 2007 13:01:37 -0700 Subject: Implement git gc --auto This implements a new option "git gc --auto". When gc.auto is set to a positive value, and the object database has accumulated roughly that many number of loose objects, this runs a lightweight version of "git gc". The primary difference from the full "git gc" is that it does not pass "-a" option to "git repack", which means we do not try to repack _everything_, but only repack incrementally. We still do "git prune-packed". The default threshold is arbitrarily set by yours truly to: - not trigger it for fully unpacked git v0.99 history; - do trigger it for fully unpacked git v1.0.0 history; - not trigger it for incremental update to git v1.0.0 starting from fully packed git v0.99 history. This patch does not add invocation of the "auto repacking". It is left to key Porcelain commands that could produce tons of loose objects to add a call to "git gc --auto" after they are done their work. Signed-off-by: Junio C Hamano --- builtin-gc.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/builtin-gc.c b/builtin-gc.c index 9397482610..093b3dda9f 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -20,6 +20,7 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]"; static int pack_refs = 1; static int aggressive_window = -1; +static int gc_auto_threshold = 6700; #define MAX_ADD 10 static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL}; @@ -28,6 +29,8 @@ static const char *argv_repack[MAX_ADD] = {"repack", "-a", "-d", "-l", NULL}; static const char *argv_prune[] = {"prune", NULL}; static const char *argv_rerere[] = {"rerere", "gc", NULL}; +static const char *argv_repack_auto[] = {"repack", "-d", "-l", NULL}; + static int gc_config(const char *var, const char *value) { if (!strcmp(var, "gc.packrefs")) { @@ -41,6 +44,10 @@ static int gc_config(const char *var, const char *value) aggressive_window = git_config_int(var, value); return 0; } + if (!strcmp(var, "gc.auto")) { + gc_auto_threshold = git_config_int(var, value); + return 0; + } return git_default_config(var, value); } @@ -57,10 +64,49 @@ static void append_option(const char **cmd, const char *opt, int max_length) cmd[i] = NULL; } +static int need_to_gc(void) +{ + /* + * Quickly check if a "gc" is needed, by estimating how + * many loose objects there are. Because SHA-1 is evenly + * distributed, we can check only one and get a reasonable + * estimate. + */ + char path[PATH_MAX]; + const char *objdir = get_object_directory(); + DIR *dir; + struct dirent *ent; + int auto_threshold; + int num_loose = 0; + int needed = 0; + + if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) { + warning("insanely long object directory %.*s", 50, objdir); + return 0; + } + dir = opendir(path); + if (!dir) + return 0; + + auto_threshold = (gc_auto_threshold + 255) / 256; + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != 38 || + ent->d_name[38] != '\0') + continue; + if (++num_loose > auto_threshold) { + needed = 1; + break; + } + } + closedir(dir); + return needed; +} + int cmd_gc(int argc, const char **argv, const char *prefix) { int i; int prune = 0; + int auto_gc = 0; char buf[80]; git_config(gc_config); @@ -82,12 +128,28 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } continue; } - /* perhaps other parameters later... */ + if (!strcmp(arg, "--auto")) { + if (gc_auto_threshold <= 0) + return 0; + auto_gc = 1; + continue; + } break; } if (i != argc) usage(builtin_gc_usage); + if (auto_gc) { + /* + * Auto-gc should be least intrusive as possible. + */ + prune = 0; + for (i = 0; i < ARRAY_SIZE(argv_repack_auto); i++) + argv_repack[i] = argv_repack_auto[i]; + if (!need_to_gc()) + return 0; + } + if (pack_refs && run_command_v_opt(argv_pack_refs, RUN_GIT_CMD)) return error(FAILED_RUN, argv_pack_refs[0]); -- cgit v1.2.3 From d4bb43ee273528064192848165f93f8fc3512be1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 5 Sep 2007 14:59:59 -0700 Subject: Invoke "git gc --auto" from commit, merge, am and rebase. The point of auto gc is to pack new objects created in loose format, so a good rule of thumb is where we do update-ref after creating a new commit. Signed-off-by: Junio C Hamano --- git-am.sh | 2 ++ git-commit.sh | 1 + git-merge.sh | 1 + git-rebase--interactive.sh | 2 ++ 4 files changed, 6 insertions(+) diff --git a/git-am.sh b/git-am.sh index 6809aa07f6..4db4701c9e 100755 --- a/git-am.sh +++ b/git-am.sh @@ -466,6 +466,8 @@ do "$GIT_DIR"/hooks/post-applypatch fi + git gc --auto + go_next done diff --git a/git-commit.sh b/git-commit.sh index 1d04f1ff31..d22d35eadc 100755 --- a/git-commit.sh +++ b/git-commit.sh @@ -652,6 +652,7 @@ git rerere if test "$ret" = 0 then + git gc --auto if test -x "$GIT_DIR"/hooks/post-commit then "$GIT_DIR"/hooks/post-commit diff --git a/git-merge.sh b/git-merge.sh index 3a01db0d75..697bec24fa 100755 --- a/git-merge.sh +++ b/git-merge.sh @@ -82,6 +82,7 @@ finish () { ;; *) git update-ref -m "$rlogm" HEAD "$1" "$head" || exit 1 + git gc --auto ;; esac ;; diff --git a/git-rebase--interactive.sh b/git-rebase--interactive.sh index abc2b1c3e0..8258b7adf9 100755 --- a/git-rebase--interactive.sh +++ b/git-rebase--interactive.sh @@ -307,6 +307,8 @@ do_next () { rm -rf "$DOTEST" && warn "Successfully rebased and updated $HEADNAME." + git gc --auto + exit } -- cgit v1.2.3 From 000dfd3f6e3f61e15ccfd4cecb3a51624adfbf38 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 16 Sep 2007 23:15:19 -0700 Subject: Export matches_pack_name() and fix its return value The function sounds boolean; make it behave as one, not "0 for success, non-zero for failure". Signed-off-by: Junio C Hamano --- cache.h | 1 + sha1_file.c | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cache.h b/cache.h index 70abbd59bf..3fa5b8e6a8 100644 --- a/cache.h +++ b/cache.h @@ -529,6 +529,7 @@ extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsign extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); +extern int matches_pack_name(struct packed_git *p, const char *name); /* Dumb servers support */ extern int update_server_info(int); diff --git a/sha1_file.c b/sha1_file.c index 9978a58da6..5801c3e71b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1684,22 +1684,22 @@ off_t find_pack_entry_one(const unsigned char *sha1, return 0; } -static int matches_pack_name(struct packed_git *p, const char *ig) +int matches_pack_name(struct packed_git *p, const char *name) { const char *last_c, *c; - if (!strcmp(p->pack_name, ig)) - return 0; + if (!strcmp(p->pack_name, name)) + return 1; for (c = p->pack_name, last_c = c; *c;) if (*c == '/') last_c = ++c; else ++c; - if (!strcmp(last_c, ig)) - return 0; + if (!strcmp(last_c, name)) + return 1; - return 1; + return 0; } static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, const char **ignore_packed) @@ -1717,7 +1717,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e, cons if (ignore_packed) { const char **ig; for (ig = ignore_packed; *ig; ig++) - if (!matches_pack_name(p, *ig)) + if (matches_pack_name(p, *ig)) break; if (*ig) goto next; -- cgit v1.2.3 From 08cdfb13374f31b0c1c47444f55042e7b72c3190 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 16 Sep 2007 23:20:07 -0700 Subject: pack-objects --keep-unreachable This new option is meant to be used in conjunction with the options "git repack -a -d" usually invokes the underlying pack-objects with. When this option is given, objects unreachable from the refs in packs named with --unpacked= option are added to the resulting pack, in addition to the reachable objects that are not in packs marked with *.keep files. Signed-off-by: Junio C Hamano --- builtin-pack-objects.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 93 insertions(+), 2 deletions(-) diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 12509faa77..ba7c8da5bf 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -21,7 +21,7 @@ git-pack-objects [{ -q | --progress | --all-progress }] \n\ [--window=N] [--window-memory=N] [--depth=N] \n\ [--no-reuse-delta] [--no-reuse-object] [--delta-base-offset] \n\ [--non-empty] [--revs [--unpacked | --all]*] [--reflog] \n\ - [--stdout | base-name] [object.sha1, OBJ_COMMIT, NULL, 0); + commit->object.flags |= OBJECT_ADDED; } static void show_object(struct object_array_entry *p) { add_preferred_base_object(p->name); add_object_entry(p->item->sha1, p->item->type, p->name, 0); + p->item->flags |= OBJECT_ADDED; } static void show_edge(struct commit *commit) @@ -1641,6 +1645,86 @@ static void show_edge(struct commit *commit) add_preferred_base(commit->object.sha1); } +struct in_pack_object { + off_t offset; + struct object *object; +}; + +struct in_pack { + int alloc; + int nr; + struct in_pack_object *array; +}; + +static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack) +{ + in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->sha1, p); + in_pack->array[in_pack->nr].object = object; + in_pack->nr++; +} + +/* + * Compare the objects in the offset order, in order to emulate the + * "git-rev-list --objects" output that produced the pack originally. + */ +static int ofscmp(const void *a_, const void *b_) +{ + struct in_pack_object *a = (struct in_pack_object *)a_; + struct in_pack_object *b = (struct in_pack_object *)b_; + + if (a->offset < b->offset) + return -1; + else if (a->offset > b->offset) + return 1; + else + return hashcmp(a->object->sha1, b->object->sha1); +} + +static void add_objects_in_unpacked_packs(struct rev_info *revs) +{ + struct packed_git *p; + struct in_pack in_pack; + uint32_t i; + + memset(&in_pack, 0, sizeof(in_pack)); + + for (p = packed_git; p; p = p->next) { + const unsigned char *sha1; + struct object *o; + + for (i = 0; i < revs->num_ignore_packed; i++) { + if (matches_pack_name(p, revs->ignore_packed[i])) + break; + } + if (revs->num_ignore_packed <= i) + continue; + if (open_pack_index(p)) + die("cannot open pack index"); + + ALLOC_GROW(in_pack.array, + in_pack.nr + p->num_objects, + in_pack.alloc); + + for (i = 0; i < p->num_objects; i++) { + sha1 = nth_packed_object_sha1(p, i); + o = lookup_unknown_object(sha1); + if (!(o->flags & OBJECT_ADDED)) + mark_in_pack_object(o, p, &in_pack); + o->flags |= OBJECT_ADDED; + } + } + + if (in_pack.nr) { + qsort(in_pack.array, in_pack.nr, sizeof(in_pack.array[0]), + ofscmp); + for (i = 0; i < in_pack.nr; i++) { + struct object *o = in_pack.array[i].object; + add_object_entry(o->sha1, o->type, "", 0); + } + } + free(in_pack.array); +} + static void get_object_list(int ac, const char **av) { struct rev_info revs; @@ -1672,6 +1756,9 @@ static void get_object_list(int ac, const char **av) prepare_revision_walk(&revs); mark_edges_uninteresting(revs.commits, &revs, show_edge); traverse_commit_list(&revs, show_commit, show_object); + + if (keep_unreachable) + add_objects_in_unpacked_packs(&revs); } static int adjust_perm(const char *path, mode_t mode) @@ -1789,6 +1876,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) use_internal_rev_list = 1; continue; } + if (!strcmp("--keep-unreachable", arg)) { + keep_unreachable = 1; + continue; + } if (!strcmp("--unpacked", arg) || !prefixcmp(arg, "--unpacked=") || !strcmp("--reflog", arg) || -- cgit v1.2.3 From 65aa53029a32a1ad36523f3e7a1bb933d4494805 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 16 Sep 2007 23:24:07 -0700 Subject: repack -A -d: use --keep-unreachable when repacking This is a safer variant of "repack -a -d" that does not drop unreachable objects that are in packs. Signed-off-by: Junio C Hamano --- git-repack.sh | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/git-repack.sh b/git-repack.sh index 156c5e8f4a..633b902e26 100755 --- a/git-repack.sh +++ b/git-repack.sh @@ -3,17 +3,19 @@ # Copyright (c) 2005 Linus Torvalds # -USAGE='[-a] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]' +USAGE='[-a|-A] [-d] [-f] [-l] [-n] [-q] [--max-pack-size=N] [--window=N] [--window-memory=N] [--depth=N]' SUBDIRECTORY_OK='Yes' . git-sh-setup -no_update_info= all_into_one= remove_redundant= +no_update_info= all_into_one= remove_redundant= keep_unreachable= local= quiet= no_reuse= extra= while case "$#" in 0) break ;; esac do case "$1" in -n) no_update_info=t ;; -a) all_into_one=t ;; + -A) all_into_one=t + keep_unreachable=--keep-unreachable ;; -d) remove_redundant=t ;; -q) quiet=-q ;; -f) no_reuse=--no-reuse-object ;; @@ -59,7 +61,13 @@ case ",$all_into_one," in fi done fi - [ -z "$args" ] && args='--unpacked --incremental' + if test -z "$args" + then + args='--unpacked --incremental' + elif test -n "$keep_unreachable" + then + args="$args $keep_unreachable" + fi ;; esac -- cgit v1.2.3 From caf9de2f46471dc25180bf519c07537c00a68dda Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 17 Sep 2007 00:37:06 -0700 Subject: git-gc --auto: move threshold check to need_to_gc() function. That is where we decide if we are going to run gc automatically. Signed-off-by: Junio C Hamano --- builtin-gc.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/builtin-gc.c b/builtin-gc.c index 093b3dda9f..f046a2a665 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -80,6 +80,13 @@ static int need_to_gc(void) int num_loose = 0; int needed = 0; + /* + * Setting gc.auto to 0 or negative can disable the + * automatic gc + */ + if (gc_auto_threshold <= 0) + return 0; + if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) { warning("insanely long object directory %.*s", 50, objdir); return 0; @@ -129,8 +136,6 @@ int cmd_gc(int argc, const char **argv, const char *prefix) continue; } if (!strcmp(arg, "--auto")) { - if (gc_auto_threshold <= 0) - return 0; auto_gc = 1; continue; } -- cgit v1.2.3 From e9831e83e063844b90cf9e525d0003715dd8b395 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 17 Sep 2007 00:39:52 -0700 Subject: git-gc --auto: add documentation. This documents the auto-packing of loose objects performed by git-gc --auto. Signed-off-by: Junio C Hamano --- Documentation/config.txt | 7 +++++++ Documentation/git-gc.txt | 11 ++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 866e0534b8..6b6553d9da 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -439,6 +439,13 @@ gc.aggressiveWindow:: algorithm used by 'git gc --aggressive'. This defaults to 10. +gc.auto:: + When there are approximately more than this many loose + objects in the repository, `git gc --auto` will pack them. + Some Porcelain commands use this command to perform a + light-weight garbage collection from time to time. Setting + this to 0 disables it. + gc.packrefs:: `git gc` does not run `git pack-refs` in a bare repository by default so that older dumb-transport clients can still fetch diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index c7742ca963..40c1ce4a21 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -8,7 +8,7 @@ git-gc - Cleanup unnecessary files and optimize the local repository SYNOPSIS -------- -'git-gc' [--prune] [--aggressive] +'git-gc' [--prune] [--aggressive] [--auto] DESCRIPTION ----------- @@ -43,6 +43,15 @@ OPTIONS persistent, so this option only needs to be used occasionally; every few hundred changesets or so. +--auto:: + With this option, `git gc` checks if there are too many + loose objects in the repository and runs + gitlink:git-repack[1] with `-d -l` option to pack them. + The threshold is set with `gc.auto` configuration + variable, and can be disabled by setting it to 0. Some + Porcelain commands use this after they perform operation + that could create many loose objects automatically. + Configuration ------------- -- cgit v1.2.3 From a087cc9819d5790a0aeb42c2bd74f781c555e8d6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 17 Sep 2007 00:44:17 -0700 Subject: git-gc --auto: protect ourselves from accumulated cruft Deciding to run "repack -d -l" when there are too many loose objects would backfire when there are too many loose objects that are unreachable, because repacking that way would never improve the situation. Detect that case by checking the number of loose objects again after automatic garbage collection runs, and issue an warning to run "prune" manually. Signed-off-by: Junio C Hamano --- builtin-gc.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/builtin-gc.c b/builtin-gc.c index f046a2a665..bf29f5e1a2 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -64,7 +64,7 @@ static void append_option(const char **cmd, const char *opt, int max_length) cmd[i] = NULL; } -static int need_to_gc(void) +static int too_many_loose_objects(void) { /* * Quickly check if a "gc" is needed, by estimating how @@ -80,13 +80,6 @@ static int need_to_gc(void) int num_loose = 0; int needed = 0; - /* - * Setting gc.auto to 0 or negative can disable the - * automatic gc - */ - if (gc_auto_threshold <= 0) - return 0; - if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) { warning("insanely long object directory %.*s", 50, objdir); return 0; @@ -109,6 +102,18 @@ static int need_to_gc(void) return needed; } +static int need_to_gc(void) +{ + /* + * Setting gc.auto to 0 or negative can disable the + * automatic gc + */ + if (gc_auto_threshold <= 0) + return 0; + + return too_many_loose_objects(); +} + int cmd_gc(int argc, const char **argv, const char *prefix) { int i; @@ -170,5 +175,9 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (run_command_v_opt(argv_rerere, RUN_GIT_CMD)) return error(FAILED_RUN, argv_rerere[0]); + if (auto_gc && too_many_loose_objects()) + warning("There are too many unreachable loose objects; " + "run 'git prune' to remove them."); + return 0; } -- cgit v1.2.3 From 95143f9e686dee144e0ff4a20190b923e20e1b64 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 17 Sep 2007 00:48:39 -0700 Subject: git-gc --auto: restructure the way "repack" command line is built. We used to build the command line to run repack outside of need_to_gc() but with the next patch we would want to tweak the command line depending on the nature of need. Signed-off-by: Junio C Hamano --- builtin-gc.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/builtin-gc.c b/builtin-gc.c index bf29f5e1a2..34ce35befb 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -29,8 +29,6 @@ static const char *argv_repack[MAX_ADD] = {"repack", "-a", "-d", "-l", NULL}; static const char *argv_prune[] = {"prune", NULL}; static const char *argv_rerere[] = {"rerere", "gc", NULL}; -static const char *argv_repack_auto[] = {"repack", "-d", "-l", NULL}; - static int gc_config(const char *var, const char *value) { if (!strcmp(var, "gc.packrefs")) { @@ -104,6 +102,8 @@ static int too_many_loose_objects(void) static int need_to_gc(void) { + int ac = 0; + /* * Setting gc.auto to 0 or negative can disable the * automatic gc @@ -111,7 +111,14 @@ static int need_to_gc(void) if (gc_auto_threshold <= 0) return 0; - return too_many_loose_objects(); + if (!too_many_loose_objects()) + return 0; + + argv_repack[ac++] = "repack"; + argv_repack[ac++] = "-d"; + argv_repack[ac++] = "-l"; + argv_repack[ac++] = NULL; + return 1; } int cmd_gc(int argc, const char **argv, const char *prefix) @@ -154,8 +161,6 @@ int cmd_gc(int argc, const char **argv, const char *prefix) * Auto-gc should be least intrusive as possible. */ prune = 0; - for (i = 0; i < ARRAY_SIZE(argv_repack_auto); i++) - argv_repack[i] = argv_repack_auto[i]; if (!need_to_gc()) return 0; } -- cgit v1.2.3 From 17815501a8f95c080891acd9537514adfe17c80e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 17 Sep 2007 00:55:13 -0700 Subject: git-gc --auto: run "repack -A -d -l" as necessary. This teaches "git-gc --auto" to consolidate many packs into one without losing unreachable objects in them by using "repack -A" when there are too many packfiles that are not marked with *.keep in the repository. gc.autopacklimit configuration can be used to set the maximum number of packs a repository is allowed to have before this mechanism kicks in. Signed-off-by: Junio C Hamano --- Documentation/config.txt | 6 +++++ Documentation/git-gc.txt | 7 +++++- builtin-gc.c | 60 +++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 66 insertions(+), 7 deletions(-) diff --git a/Documentation/config.txt b/Documentation/config.txt index 6b6553d9da..b0390f82b8 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -446,6 +446,12 @@ gc.auto:: light-weight garbage collection from time to time. Setting this to 0 disables it. +gc.autopacklimit:: + When there are more than this many packs that are not + marked with `*.keep` file in the repository, `git gc + --auto` consolidates them into one larger pack. Setting + this to 0 disables this. + gc.packrefs:: `git gc` does not run `git pack-refs` in a bare repository by default so that older dumb-transport clients can still fetch diff --git a/Documentation/git-gc.txt b/Documentation/git-gc.txt index 40c1ce4a21..b9d5660eac 100644 --- a/Documentation/git-gc.txt +++ b/Documentation/git-gc.txt @@ -47,10 +47,15 @@ OPTIONS With this option, `git gc` checks if there are too many loose objects in the repository and runs gitlink:git-repack[1] with `-d -l` option to pack them. - The threshold is set with `gc.auto` configuration + The threshold for loose objects is set with `gc.auto` configuration variable, and can be disabled by setting it to 0. Some Porcelain commands use this after they perform operation that could create many loose objects automatically. + Additionally, when there are too many packs are present, + they are consolidated into one larger pack by running + the `git-repack` command with `-A` option. The + threshold for number of packs is set with + `gc.autopacklimit` configuration variable. Configuration ------------- diff --git a/builtin-gc.c b/builtin-gc.c index 34ce35befb..23ad2b6a21 100644 --- a/builtin-gc.c +++ b/builtin-gc.c @@ -21,6 +21,7 @@ static const char builtin_gc_usage[] = "git-gc [--prune] [--aggressive]"; static int pack_refs = 1; static int aggressive_window = -1; static int gc_auto_threshold = 6700; +static int gc_auto_pack_limit = 20; #define MAX_ADD 10 static const char *argv_pack_refs[] = {"pack-refs", "--all", "--prune", NULL}; @@ -46,6 +47,10 @@ static int gc_config(const char *var, const char *value) gc_auto_threshold = git_config_int(var, value); return 0; } + if (!strcmp(var, "gc.autopacklimit")) { + gc_auto_pack_limit = git_config_int(var, value); + return 0; + } return git_default_config(var, value); } @@ -78,6 +83,9 @@ static int too_many_loose_objects(void) int num_loose = 0; int needed = 0; + if (gc_auto_threshold <= 0) + return 0; + if (sizeof(path) <= snprintf(path, sizeof(path), "%s/17", objdir)) { warning("insanely long object directory %.*s", 50, objdir); return 0; @@ -100,21 +108,61 @@ static int too_many_loose_objects(void) return needed; } +static int too_many_packs(void) +{ + struct packed_git *p; + int cnt; + + if (gc_auto_pack_limit <= 0) + return 0; + + prepare_packed_git(); + for (cnt = 0, p = packed_git; p; p = p->next) { + char path[PATH_MAX]; + size_t len; + int keep; + + if (!p->pack_local) + continue; + len = strlen(p->pack_name); + if (PATH_MAX <= len + 1) + continue; /* oops, give up */ + memcpy(path, p->pack_name, len-5); + memcpy(path + len - 5, ".keep", 6); + keep = access(p->pack_name, F_OK) && (errno == ENOENT); + if (keep) + continue; + /* + * Perhaps check the size of the pack and count only + * very small ones here? + */ + cnt++; + } + return gc_auto_pack_limit <= cnt; +} + static int need_to_gc(void) { int ac = 0; /* - * Setting gc.auto to 0 or negative can disable the - * automatic gc + * Setting gc.auto and gc.autopacklimit to 0 or negative can + * disable the automatic gc. */ - if (gc_auto_threshold <= 0) - return 0; - - if (!too_many_loose_objects()) + if (gc_auto_threshold <= 0 && gc_auto_pack_limit <= 0) return 0; + /* + * If there are too many loose objects, but not too many + * packs, we run "repack -d -l". If there are too many packs, + * we run "repack -A -d -l". Otherwise we tell the caller + * there is no need. + */ argv_repack[ac++] = "repack"; + if (too_many_packs()) + argv_repack[ac++] = "-A"; + else if (!too_many_loose_objects()) + return 0; argv_repack[ac++] = "-d"; argv_repack[ac++] = "-l"; argv_repack[ac++] = NULL; -- cgit v1.2.3