summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/git-maintenance.txt15
-rw-r--r--builtin/gc.c97
-rwxr-xr-xt/t7900-maintenance.sh39
3 files changed, 151 insertions, 0 deletions
diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt
index 12668fccf7..fc95eb594f 100644
--- a/Documentation/git-maintenance.txt
+++ b/Documentation/git-maintenance.txt
@@ -70,6 +70,21 @@ gc::
be disruptive in some situations, as it deletes stale data. See
linkgit:git-gc[1] for more details on garbage collection in Git.
+loose-objects::
+ The `loose-objects` job cleans up loose objects and places them into
+ pack-files. In order to prevent race conditions with concurrent Git
+ commands, it follows a two-step process. First, it deletes any loose
+ objects that already exist in a pack-file; concurrent Git processes
+ will examine the pack-file for the object data instead of the loose
+ object. Second, it creates a new pack-file (starting with "loose-")
+ containing a batch of loose objects. The batch size is limited to 50
+ thousand objects to prevent the job from taking too long on a
+ repository with many loose objects. The `gc` task writes unreachable
+ objects as loose objects to be cleaned up by a later step only if
+ they are not re-added to a pack-file; for this reason it is not
+ advisable to enable both the `loose-objects` and `gc` tasks at the
+ same time.
+
OPTIONS
-------
--auto::
diff --git a/builtin/gc.c b/builtin/gc.c
index 5e469488f4..c9db8555b9 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -880,6 +880,98 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts)
return run_command(&child);
}
+static int prune_packed(struct maintenance_run_opts *opts)
+{
+ struct child_process child = CHILD_PROCESS_INIT;
+
+ child.git_cmd = 1;
+ strvec_push(&child.args, "prune-packed");
+
+ if (opts->quiet)
+ strvec_push(&child.args, "--quiet");
+
+ return !!run_command(&child);
+}
+
+struct write_loose_object_data {
+ FILE *in;
+ int count;
+ int batch_size;
+};
+
+static int bail_on_loose(const struct object_id *oid,
+ const char *path,
+ void *data)
+{
+ return 1;
+}
+
+static int write_loose_object_to_stdin(const struct object_id *oid,
+ const char *path,
+ void *data)
+{
+ struct write_loose_object_data *d = (struct write_loose_object_data *)data;
+
+ fprintf(d->in, "%s\n", oid_to_hex(oid));
+
+ return ++(d->count) > d->batch_size;
+}
+
+static int pack_loose(struct maintenance_run_opts *opts)
+{
+ struct repository *r = the_repository;
+ int result = 0;
+ struct write_loose_object_data data;
+ struct child_process pack_proc = CHILD_PROCESS_INIT;
+
+ /*
+ * Do not start pack-objects process
+ * if there are no loose objects.
+ */
+ if (!for_each_loose_file_in_objdir(r->objects->odb->path,
+ bail_on_loose,
+ NULL, NULL, NULL))
+ return 0;
+
+ pack_proc.git_cmd = 1;
+
+ strvec_push(&pack_proc.args, "pack-objects");
+ if (opts->quiet)
+ strvec_push(&pack_proc.args, "--quiet");
+ strvec_pushf(&pack_proc.args, "%s/pack/loose", r->objects->odb->path);
+
+ pack_proc.in = -1;
+
+ if (start_command(&pack_proc)) {
+ error(_("failed to start 'git pack-objects' process"));
+ return 1;
+ }
+
+ data.in = xfdopen(pack_proc.in, "w");
+ data.count = 0;
+ data.batch_size = 50000;
+
+ for_each_loose_file_in_objdir(r->objects->odb->path,
+ write_loose_object_to_stdin,
+ NULL,
+ NULL,
+ &data);
+
+ fclose(data.in);
+
+ if (finish_command(&pack_proc)) {
+ error(_("failed to finish 'git pack-objects' process"));
+ result = 1;
+ }
+
+ return result;
+}
+
+static int maintenance_task_loose_objects(struct maintenance_run_opts *opts)
+{
+ return prune_packed(opts) || pack_loose(opts);
+}
+
typedef int maintenance_task_fn(struct maintenance_run_opts *opts);
/*
@@ -901,6 +993,7 @@ struct maintenance_task {
enum maintenance_task_label {
TASK_PREFETCH,
+ TASK_LOOSE_OBJECTS,
TASK_GC,
TASK_COMMIT_GRAPH,
@@ -913,6 +1006,10 @@ static struct maintenance_task tasks[] = {
"prefetch",
maintenance_task_prefetch,
},
+ [TASK_LOOSE_OBJECTS] = {
+ "loose-objects",
+ maintenance_task_loose_objects,
+ },
[TASK_GC] = {
"gc",
maintenance_task_gc,
diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh
index 045524e6ad..b3fc7c8670 100755
--- a/t/t7900-maintenance.sh
+++ b/t/t7900-maintenance.sh
@@ -88,4 +88,43 @@ test_expect_success 'prefetch multiple remotes' '
test_cmp_rev refs/remotes/remote2/two refs/prefetch/remote2/two
'
+test_expect_success 'loose-objects task' '
+ # Repack everything so we know the state of the object dir
+ git repack -adk &&
+
+ # Hack to stop maintenance from running during "git commit"
+ echo in use >.git/objects/maintenance.lock &&
+
+ # Assuming that "git commit" creates at least one loose object
+ test_commit create-loose-object &&
+ rm .git/objects/maintenance.lock &&
+
+ ls .git/objects >obj-dir-before &&
+ test_file_not_empty obj-dir-before &&
+ ls .git/objects/pack/*.pack >packs-before &&
+ test_line_count = 1 packs-before &&
+
+ # The first run creates a pack-file
+ # but does not delete loose objects.
+ git maintenance run --task=loose-objects &&
+ ls .git/objects >obj-dir-between &&
+ test_cmp obj-dir-before obj-dir-between &&
+ ls .git/objects/pack/*.pack >packs-between &&
+ test_line_count = 2 packs-between &&
+ ls .git/objects/pack/loose-*.pack >loose-packs &&
+ test_line_count = 1 loose-packs &&
+
+ # The second run deletes loose objects
+ # but does not create a pack-file.
+ git maintenance run --task=loose-objects &&
+ ls .git/objects >obj-dir-after &&
+ cat >expect <<-\EOF &&
+ info
+ pack
+ EOF
+ test_cmp expect obj-dir-after &&
+ ls .git/objects/pack/*.pack >packs-after &&
+ test_cmp packs-between packs-after
+'
+
test_done