summaryrefslogtreecommitdiff
path: root/builtin/gc.c
diff options
context:
space:
mode:
authorLibravatar Derrick Stolee <dstolee@microsoft.com>2020-09-25 12:33:32 +0000
committerLibravatar Junio C Hamano <gitster@pobox.com>2020-09-25 10:53:04 -0700
commit252cfb7cb86a33f7740c799748ee6c586931c7bc (patch)
tree448b62e3fa3b920d3da7c4484be34dbc95b56f00 /builtin/gc.c
parentmaintenance: add prefetch task (diff)
downloadtgif-252cfb7cb86a33f7740c799748ee6c586931c7bc.tar.xz
maintenance: add loose-objects task
One goal of background maintenance jobs is to allow a user to disable auto-gc (gc.auto=0) but keep their repository in a clean state. Without any cleanup, loose objects will clutter the object database and slow operations. In addition, the loose objects will take up extra space because they are not stored with deltas against similar objects. Create a 'loose-objects' task for the 'git maintenance run' command. This helps clean up loose objects without disrupting concurrent Git commands using the following sequence of events: 1. Run 'git prune-packed' to delete any loose objects that exist in a pack-file. Concurrent commands will prefer the packed version of the object to the loose version. (Of course, there are exceptions for commands that specifically care about the location of an object. These are rare for a user to run on purpose, and we hope a user that has selected background maintenance will not be trying to do foreground maintenance.) 2. Run 'git pack-objects' on a batch of loose objects. These objects are grouped by scanning the loose object directories in lexicographic order until listing all loose objects -or- reaching 50,000 objects. This is more than enough if the loose objects are created only by a user doing normal development. We noticed users with _millions_ of loose objects because VFS for Git downloads blobs on-demand when a file read operation requires populating a virtual file. This step is based on a similar step in Scalar [1] and VFS for Git. [1] https://github.com/microsoft/scalar/blob/master/Scalar.Common/Maintenance/LooseObjectsStep.cs Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/gc.c')
-rw-r--r--builtin/gc.c97
1 files changed, 97 insertions, 0 deletions
diff --git a/builtin/gc.c b/builtin/gc.c
index 5e469488f4..c9db8555b9 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -880,6 +880,98 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts)
return run_command(&child);
}
+static int prune_packed(struct maintenance_run_opts *opts)
+{
+ struct child_process child = CHILD_PROCESS_INIT;
+
+ child.git_cmd = 1;
+ strvec_push(&child.args, "prune-packed");
+
+ if (opts->quiet)
+ strvec_push(&child.args, "--quiet");
+
+ return !!run_command(&child);
+}
+
+struct write_loose_object_data {
+ FILE *in;
+ int count;
+ int batch_size;
+};
+
+static int bail_on_loose(const struct object_id *oid,
+ const char *path,
+ void *data)
+{
+ return 1;
+}
+
+static int write_loose_object_to_stdin(const struct object_id *oid,
+ const char *path,
+ void *data)
+{
+ struct write_loose_object_data *d = (struct write_loose_object_data *)data;
+
+ fprintf(d->in, "%s\n", oid_to_hex(oid));
+
+ return ++(d->count) > d->batch_size;
+}
+
+static int pack_loose(struct maintenance_run_opts *opts)
+{
+ struct repository *r = the_repository;
+ int result = 0;
+ struct write_loose_object_data data;
+ struct child_process pack_proc = CHILD_PROCESS_INIT;
+
+ /*
+ * Do not start pack-objects process
+ * if there are no loose objects.
+ */
+ if (!for_each_loose_file_in_objdir(r->objects->odb->path,
+ bail_on_loose,
+ NULL, NULL, NULL))
+ return 0;
+
+ pack_proc.git_cmd = 1;
+
+ strvec_push(&pack_proc.args, "pack-objects");
+ if (opts->quiet)
+ strvec_push(&pack_proc.args, "--quiet");
+ strvec_pushf(&pack_proc.args, "%s/pack/loose", r->objects->odb->path);
+
+ pack_proc.in = -1;
+
+ if (start_command(&pack_proc)) {
+ error(_("failed to start 'git pack-objects' process"));
+ return 1;
+ }
+
+ data.in = xfdopen(pack_proc.in, "w");
+ data.count = 0;
+ data.batch_size = 50000;
+
+ for_each_loose_file_in_objdir(r->objects->odb->path,
+ write_loose_object_to_stdin,
+ NULL,
+ NULL,
+ &data);
+
+ fclose(data.in);
+
+ if (finish_command(&pack_proc)) {
+ error(_("failed to finish 'git pack-objects' process"));
+ result = 1;
+ }
+
+ return result;
+}
+
+static int maintenance_task_loose_objects(struct maintenance_run_opts *opts)
+{
+ return prune_packed(opts) || pack_loose(opts);
+}
+
typedef int maintenance_task_fn(struct maintenance_run_opts *opts);
/*
@@ -901,6 +993,7 @@ struct maintenance_task {
enum maintenance_task_label {
TASK_PREFETCH,
+ TASK_LOOSE_OBJECTS,
TASK_GC,
TASK_COMMIT_GRAPH,
@@ -913,6 +1006,10 @@ static struct maintenance_task tasks[] = {
"prefetch",
maintenance_task_prefetch,
},
+ [TASK_LOOSE_OBJECTS] = {
+ "loose-objects",
+ maintenance_task_loose_objects,
+ },
[TASK_GC] = {
"gc",
maintenance_task_gc,