diff options
-rw-r--r-- | Documentation/git-maintenance.txt | 15 | ||||
-rw-r--r-- | builtin/gc.c | 97 | ||||
-rwxr-xr-x | t/t7900-maintenance.sh | 39 |
3 files changed, 151 insertions, 0 deletions
diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 12668fccf7..fc95eb594f 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -70,6 +70,21 @@ gc:: be disruptive in some situations, as it deletes stale data. See linkgit:git-gc[1] for more details on garbage collection in Git. +loose-objects:: + The `loose-objects` job cleans up loose objects and places them into + pack-files. In order to prevent race conditions with concurrent Git + commands, it follows a two-step process. First, it deletes any loose + objects that already exist in a pack-file; concurrent Git processes + will examine the pack-file for the object data instead of the loose + object. Second, it creates a new pack-file (starting with "loose-") + containing a batch of loose objects. The batch size is limited to 50 + thousand objects to prevent the job from taking too long on a + repository with many loose objects. The `gc` task writes unreachable + objects as loose objects to be cleaned up by a later step only if + they are not re-added to a pack-file; for this reason it is not + advisable to enable both the `loose-objects` and `gc` tasks at the + same time. + OPTIONS ------- --auto:: diff --git a/builtin/gc.c b/builtin/gc.c index 5e469488f4..c9db8555b9 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -880,6 +880,98 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts) return run_command(&child); } +static int prune_packed(struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_push(&child.args, "prune-packed"); + + if (opts->quiet) + strvec_push(&child.args, "--quiet"); + + return !!run_command(&child); +} + +struct write_loose_object_data { + FILE *in; + int count; + int batch_size; +}; + +static int bail_on_loose(const struct object_id *oid, + const char *path, + void *data) +{ + return 1; +} + +static int write_loose_object_to_stdin(const struct object_id *oid, + const char *path, + void *data) +{ + struct write_loose_object_data *d = (struct write_loose_object_data *)data; + + fprintf(d->in, "%s\n", oid_to_hex(oid)); + + return ++(d->count) > d->batch_size; +} + +static int pack_loose(struct maintenance_run_opts *opts) +{ + struct repository *r = the_repository; + int result = 0; + struct write_loose_object_data data; + struct child_process pack_proc = CHILD_PROCESS_INIT; + + /* + * Do not start pack-objects process + * if there are no loose objects. + */ + if (!for_each_loose_file_in_objdir(r->objects->odb->path, + bail_on_loose, + NULL, NULL, NULL)) + return 0; + + pack_proc.git_cmd = 1; + + strvec_push(&pack_proc.args, "pack-objects"); + if (opts->quiet) + strvec_push(&pack_proc.args, "--quiet"); + strvec_pushf(&pack_proc.args, "%s/pack/loose", r->objects->odb->path); + + pack_proc.in = -1; + + if (start_command(&pack_proc)) { + error(_("failed to start 'git pack-objects' process")); + return 1; + } + + data.in = xfdopen(pack_proc.in, "w"); + data.count = 0; + data.batch_size = 50000; + + for_each_loose_file_in_objdir(r->objects->odb->path, + write_loose_object_to_stdin, + NULL, + NULL, + &data); + + fclose(data.in); + + if (finish_command(&pack_proc)) { + error(_("failed to finish 'git pack-objects' process")); + result = 1; + } + + return result; +} + +static int maintenance_task_loose_objects(struct maintenance_run_opts *opts) +{ + return prune_packed(opts) || pack_loose(opts); +} + typedef int maintenance_task_fn(struct maintenance_run_opts *opts); /* @@ -901,6 +993,7 @@ struct maintenance_task { enum maintenance_task_label { TASK_PREFETCH, + TASK_LOOSE_OBJECTS, TASK_GC, TASK_COMMIT_GRAPH, @@ -913,6 +1006,10 @@ static struct maintenance_task tasks[] = { "prefetch", maintenance_task_prefetch, }, + [TASK_LOOSE_OBJECTS] = { + "loose-objects", + maintenance_task_loose_objects, + }, [TASK_GC] = { "gc", maintenance_task_gc, diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 045524e6ad..b3fc7c8670 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -88,4 +88,43 @@ test_expect_success 'prefetch multiple remotes' ' test_cmp_rev refs/remotes/remote2/two refs/prefetch/remote2/two ' +test_expect_success 'loose-objects task' ' + # Repack everything so we know the state of the object dir + git repack -adk && + + # Hack to stop maintenance from running during "git commit" + echo in use >.git/objects/maintenance.lock && + + # Assuming that "git commit" creates at least one loose object + test_commit create-loose-object && + rm .git/objects/maintenance.lock && + + ls .git/objects >obj-dir-before && + test_file_not_empty obj-dir-before && + ls .git/objects/pack/*.pack >packs-before && + test_line_count = 1 packs-before && + + # The first run creates a pack-file + # but does not delete loose objects. + git maintenance run --task=loose-objects && + ls .git/objects >obj-dir-between && + test_cmp obj-dir-before obj-dir-between && + ls .git/objects/pack/*.pack >packs-between && + test_line_count = 2 packs-between && + ls .git/objects/pack/loose-*.pack >loose-packs && + test_line_count = 1 loose-packs && + + # The second run deletes loose objects + # but does not create a pack-file. + git maintenance run --task=loose-objects && + ls .git/objects >obj-dir-after && + cat >expect <<-\EOF && + info + pack + EOF + test_cmp expect obj-dir-after && + ls .git/objects/pack/*.pack >packs-after && + test_cmp packs-between packs-after +' + test_done |