diff options
Diffstat (limited to 'builtin/gc.c')
-rw-r--r-- | builtin/gc.c | 628 |
1 files changed, 612 insertions, 16 deletions
diff --git a/builtin/gc.c b/builtin/gc.c index 090959350e..4c24f41852 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -29,6 +29,9 @@ #include "tree.h" #include "promisor-remote.h" #include "refs.h" +#include "remote.h" +#include "object-store.h" +#include "exec-cmd.h" #define FAILED_RUN "failed to run %s" @@ -531,7 +534,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) const char *name; pid_t pid; int daemonized = 0; - int keep_base_pack = -1; + int keep_largest_pack = -1; timestamp_t dummy; struct option builtin_gc_options[] = { @@ -545,7 +548,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) OPT_BOOL_F(0, "force", &force, N_("force running gc even if there may be another gc running"), PARSE_OPT_NOCOMPLETE), - OPT_BOOL(0, "keep-largest-pack", &keep_base_pack, + OPT_BOOL(0, "keep-largest-pack", &keep_largest_pack, N_("repack all other packs except the largest pack")), OPT_END() }; @@ -622,8 +625,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } else { struct string_list keep_pack = STRING_LIST_INIT_NODUP; - if (keep_base_pack != -1) { - if (keep_base_pack) + if (keep_largest_pack != -1) { + if (keep_largest_pack) find_base_packs(&keep_pack, 0); } else if (big_pack_threshold) { find_base_packs(&keep_pack, big_pack_threshold); @@ -701,14 +704,51 @@ int cmd_gc(int argc, const char **argv, const char *prefix) return 0; } -static const char * const builtin_maintenance_run_usage[] = { - N_("git maintenance run [--auto] [--[no-]quiet] [--task=<task>]"), +static const char *const builtin_maintenance_run_usage[] = { + N_("git maintenance run [--auto] [--[no-]quiet] [--task=<task>] [--schedule]"), NULL }; +enum schedule_priority { + SCHEDULE_NONE = 0, + SCHEDULE_WEEKLY = 1, + SCHEDULE_DAILY = 2, + SCHEDULE_HOURLY = 3, +}; + +static enum schedule_priority parse_schedule(const char *value) +{ + if (!value) + return SCHEDULE_NONE; + if (!strcasecmp(value, "hourly")) + return SCHEDULE_HOURLY; + if (!strcasecmp(value, "daily")) + return SCHEDULE_DAILY; + if (!strcasecmp(value, "weekly")) + return SCHEDULE_WEEKLY; + return SCHEDULE_NONE; +} + +static int maintenance_opt_schedule(const struct option *opt, const char *arg, + int unset) +{ + enum schedule_priority *priority = opt->value; + + if (unset) + die(_("--no-schedule is not allowed")); + + *priority = parse_schedule(arg); + + if (!*priority) + die(_("unrecognized --schedule argument '%s'"), arg); + + return 0; +} + struct maintenance_run_opts { int auto_flag; int quiet; + enum schedule_priority schedule; }; /* Remember to update object flag allocation in object.h */ @@ -737,9 +777,15 @@ static int dfs_on_ref(const char *refname, commit = lookup_commit(the_repository, oid); if (!commit) return 0; - if (parse_commit(commit)) + if (parse_commit(commit) || + commit_graph_position(commit) != COMMIT_NOT_FROM_GRAPH) return 0; + data->num_not_in_graph++; + + if (data->num_not_in_graph >= data->limit) + return 1; + commit_list_append(commit, &stack); while (!result && stack) { @@ -786,7 +832,7 @@ static int should_write_commit_graph(void) result = for_each_ref(dfs_on_ref, &data); - clear_commit_marks_all(SEEN); + repo_clear_commit_marks(the_repository, SEEN); return result; } @@ -807,6 +853,10 @@ static int run_write_commit_graph(struct maintenance_run_opts *opts) static int maintenance_task_commit_graph(struct maintenance_run_opts *opts) { + prepare_repo_settings(the_repository); + if (!the_repository->settings.core_commit_graph) + return 0; + close_object_store(the_repository->objects); if (run_write_commit_graph(opts)) { error(_("failed to write commit-graph")); @@ -816,6 +866,51 @@ static int maintenance_task_commit_graph(struct maintenance_run_opts *opts) return 0; } +static int fetch_remote(const char *remote, struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_pushl(&child.args, "fetch", remote, "--prune", "--no-tags", + "--no-write-fetch-head", "--recurse-submodules=no", + "--refmap=", NULL); + + if (opts->quiet) + strvec_push(&child.args, "--quiet"); + + strvec_pushf(&child.args, "+refs/heads/*:refs/prefetch/%s/*", remote); + + return !!run_command(&child); +} + +static int append_remote(struct remote *remote, void *cbdata) +{ + struct string_list *remotes = (struct string_list *)cbdata; + + string_list_append(remotes, remote->name); + return 0; +} + +static int maintenance_task_prefetch(struct maintenance_run_opts *opts) +{ + int result = 0; + struct string_list_item *item; + struct string_list remotes = STRING_LIST_INIT_DUP; + + if (for_each_remote(append_remote, &remotes)) { + error(_("failed to fill remotes")); + result = 1; + goto cleanup; + } + + for_each_string_list_item(item, &remotes) + result |= fetch_remote(item->string, opts); + +cleanup: + string_list_clear(&remotes, 0); + return result; +} + static int maintenance_task_gc(struct maintenance_run_opts *opts) { struct child_process child = CHILD_PROCESS_INIT; @@ -834,6 +929,268 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts) return run_command(&child); } +static int prune_packed(struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_push(&child.args, "prune-packed"); + + if (opts->quiet) + strvec_push(&child.args, "--quiet"); + + return !!run_command(&child); +} + +struct write_loose_object_data { + FILE *in; + int count; + int batch_size; +}; + +static int loose_object_auto_limit = 100; + +static int loose_object_count(const struct object_id *oid, + const char *path, + void *data) +{ + int *count = (int*)data; + if (++(*count) >= loose_object_auto_limit) + return 1; + return 0; +} + +static int loose_object_auto_condition(void) +{ + int count = 0; + + git_config_get_int("maintenance.loose-objects.auto", + &loose_object_auto_limit); + + if (!loose_object_auto_limit) + return 0; + if (loose_object_auto_limit < 0) + return 1; + + return for_each_loose_file_in_objdir(the_repository->objects->odb->path, + loose_object_count, + NULL, NULL, &count); +} + +static int bail_on_loose(const struct object_id *oid, + const char *path, + void *data) +{ + return 1; +} + +static int write_loose_object_to_stdin(const struct object_id *oid, + const char *path, + void *data) +{ + struct write_loose_object_data *d = (struct write_loose_object_data *)data; + + fprintf(d->in, "%s\n", oid_to_hex(oid)); + + return ++(d->count) > d->batch_size; +} + +static int pack_loose(struct maintenance_run_opts *opts) +{ + struct repository *r = the_repository; + int result = 0; + struct write_loose_object_data data; + struct child_process pack_proc = CHILD_PROCESS_INIT; + + /* + * Do not start pack-objects process + * if there are no loose objects. + */ + if (!for_each_loose_file_in_objdir(r->objects->odb->path, + bail_on_loose, + NULL, NULL, NULL)) + return 0; + + pack_proc.git_cmd = 1; + + strvec_push(&pack_proc.args, "pack-objects"); + if (opts->quiet) + strvec_push(&pack_proc.args, "--quiet"); + strvec_pushf(&pack_proc.args, "%s/pack/loose", r->objects->odb->path); + + pack_proc.in = -1; + + if (start_command(&pack_proc)) { + error(_("failed to start 'git pack-objects' process")); + return 1; + } + + data.in = xfdopen(pack_proc.in, "w"); + data.count = 0; + data.batch_size = 50000; + + for_each_loose_file_in_objdir(r->objects->odb->path, + write_loose_object_to_stdin, + NULL, + NULL, + &data); + + fclose(data.in); + + if (finish_command(&pack_proc)) { + error(_("failed to finish 'git pack-objects' process")); + result = 1; + } + + return result; +} + +static int maintenance_task_loose_objects(struct maintenance_run_opts *opts) +{ + return prune_packed(opts) || pack_loose(opts); +} + +static int incremental_repack_auto_condition(void) +{ + struct packed_git *p; + int enabled; + int incremental_repack_auto_limit = 10; + int count = 0; + + if (git_config_get_bool("core.multiPackIndex", &enabled) || + !enabled) + return 0; + + git_config_get_int("maintenance.incremental-repack.auto", + &incremental_repack_auto_limit); + + if (!incremental_repack_auto_limit) + return 0; + if (incremental_repack_auto_limit < 0) + return 1; + + for (p = get_packed_git(the_repository); + count < incremental_repack_auto_limit && p; + p = p->next) { + if (!p->multi_pack_index) + count++; + } + + return count >= incremental_repack_auto_limit; +} + +static int multi_pack_index_write(struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_pushl(&child.args, "multi-pack-index", "write", NULL); + + if (opts->quiet) + strvec_push(&child.args, "--no-progress"); + + if (run_command(&child)) + return error(_("failed to write multi-pack-index")); + + return 0; +} + +static int multi_pack_index_expire(struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_pushl(&child.args, "multi-pack-index", "expire", NULL); + + if (opts->quiet) + strvec_push(&child.args, "--no-progress"); + + close_object_store(the_repository->objects); + + if (run_command(&child)) + return error(_("'git multi-pack-index expire' failed")); + + return 0; +} + +#define TWO_GIGABYTES (INT32_MAX) + +static off_t get_auto_pack_size(void) +{ + /* + * The "auto" value is special: we optimize for + * one large pack-file (i.e. from a clone) and + * expect the rest to be small and they can be + * repacked quickly. + * + * The strategy we select here is to select a + * size that is one more than the second largest + * pack-file. This ensures that we will repack + * at least two packs if there are three or more + * packs. + */ + off_t max_size = 0; + off_t second_largest_size = 0; + off_t result_size; + struct packed_git *p; + struct repository *r = the_repository; + + reprepare_packed_git(r); + for (p = get_all_packs(r); p; p = p->next) { + if (p->pack_size > max_size) { + second_largest_size = max_size; + max_size = p->pack_size; + } else if (p->pack_size > second_largest_size) + second_largest_size = p->pack_size; + } + + result_size = second_largest_size + 1; + + /* But limit ourselves to a batch size of 2g */ + if (result_size > TWO_GIGABYTES) + result_size = TWO_GIGABYTES; + + return result_size; +} + +static int multi_pack_index_repack(struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_pushl(&child.args, "multi-pack-index", "repack", NULL); + + if (opts->quiet) + strvec_push(&child.args, "--no-progress"); + + strvec_pushf(&child.args, "--batch-size=%"PRIuMAX, + (uintmax_t)get_auto_pack_size()); + + close_object_store(the_repository->objects); + + if (run_command(&child)) + return error(_("'git multi-pack-index repack' failed")); + + return 0; +} + +static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts) +{ + prepare_repo_settings(the_repository); + if (!the_repository->settings.core_multi_pack_index) { + warning(_("skipping incremental-repack task because core.multiPackIndex is disabled")); + return 0; + } + + if (multi_pack_index_write(opts)) + return 1; + if (multi_pack_index_expire(opts)) + return 1; + if (multi_pack_index_repack(opts)) + return 1; + return 0; +} + typedef int maintenance_task_fn(struct maintenance_run_opts *opts); /* @@ -849,11 +1206,16 @@ struct maintenance_task { maintenance_auto_fn *auto_condition; unsigned enabled:1; + enum schedule_priority schedule; + /* -1 if not selected. */ int selected_order; }; enum maintenance_task_label { + TASK_PREFETCH, + TASK_LOOSE_OBJECTS, + TASK_INCREMENTAL_REPACK, TASK_GC, TASK_COMMIT_GRAPH, @@ -862,6 +1224,20 @@ enum maintenance_task_label { }; static struct maintenance_task tasks[] = { + [TASK_PREFETCH] = { + "prefetch", + maintenance_task_prefetch, + }, + [TASK_LOOSE_OBJECTS] = { + "loose-objects", + maintenance_task_loose_objects, + loose_object_auto_condition, + }, + [TASK_INCREMENTAL_REPACK] = { + "incremental-repack", + maintenance_task_incremental_repack, + incremental_repack_auto_condition, + }, [TASK_GC] = { "gc", maintenance_task_gc, @@ -877,10 +1253,8 @@ static struct maintenance_task tasks[] = { static int compare_tasks_by_selection(const void *a_, const void *b_) { - const struct maintenance_task *a, *b; - - a = (const struct maintenance_task *)&a_; - b = (const struct maintenance_task *)&b_; + const struct maintenance_task *a = a_; + const struct maintenance_task *b = b_; return b->selected_order - a->selected_order; } @@ -927,6 +1301,9 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) !tasks[i].auto_condition())) continue; + if (opts->schedule && tasks[i].schedule < opts->schedule) + continue; + trace2_region_enter("maintenance", tasks[i].name, r); if (tasks[i].fn(opts)) { error(_("task '%s' failed"), tasks[i].name); @@ -939,21 +1316,54 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) return result; } -static void initialize_task_config(void) +static void initialize_maintenance_strategy(void) +{ + char *config_str; + + if (git_config_get_string("maintenance.strategy", &config_str)) + return; + + if (!strcasecmp(config_str, "incremental")) { + tasks[TASK_GC].schedule = SCHEDULE_NONE; + tasks[TASK_COMMIT_GRAPH].enabled = 1; + tasks[TASK_COMMIT_GRAPH].schedule = SCHEDULE_HOURLY; + tasks[TASK_PREFETCH].enabled = 1; + tasks[TASK_PREFETCH].schedule = SCHEDULE_HOURLY; + tasks[TASK_INCREMENTAL_REPACK].enabled = 1; + tasks[TASK_INCREMENTAL_REPACK].schedule = SCHEDULE_DAILY; + tasks[TASK_LOOSE_OBJECTS].enabled = 1; + tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY; + } +} + +static void initialize_task_config(int schedule) { int i; struct strbuf config_name = STRBUF_INIT; gc_config(); + if (schedule) + initialize_maintenance_strategy(); + for (i = 0; i < TASK__COUNT; i++) { int config_value; + char *config_str; - strbuf_setlen(&config_name, 0); + strbuf_reset(&config_name); strbuf_addf(&config_name, "maintenance.%s.enabled", tasks[i].name); if (!git_config_get_bool(config_name.buf, &config_value)) tasks[i].enabled = config_value; + + strbuf_reset(&config_name); + strbuf_addf(&config_name, "maintenance.%s.schedule", + tasks[i].name); + + if (!git_config_get_string(config_name.buf, &config_str)) { + tasks[i].schedule = parse_schedule(config_str); + free(config_str); + } } strbuf_release(&config_name); @@ -997,6 +1407,9 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, N_("run tasks based on the state of the repository")), + OPT_CALLBACK(0, "schedule", &opts.schedule, N_("frequency"), + N_("run tasks based on frequency"), + maintenance_opt_schedule), OPT_BOOL(0, "quiet", &opts.quiet, N_("do not report progress or other information over stderr")), OPT_CALLBACK_F(0, "task", NULL, N_("task"), @@ -1007,7 +1420,6 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) memset(&opts, 0, sizeof(opts)); opts.quiet = !isatty(2); - initialize_task_config(); for (i = 0; i < TASK__COUNT; i++) tasks[i].selected_order = -1; @@ -1017,13 +1429,189 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) builtin_maintenance_run_usage, PARSE_OPT_STOP_AT_NON_OPTION); + if (opts.auto_flag && opts.schedule) + die(_("use at most one of --auto and --schedule=<frequency>")); + + initialize_task_config(opts.schedule); + if (argc != 0) usage_with_options(builtin_maintenance_run_usage, builtin_maintenance_run_options); return maintenance_run_tasks(&opts); } -static const char builtin_maintenance_usage[] = N_("git maintenance run [<options>]"); +static int maintenance_register(void) +{ + char *config_value; + struct child_process config_set = CHILD_PROCESS_INIT; + struct child_process config_get = CHILD_PROCESS_INIT; + + /* Disable foreground maintenance */ + git_config_set("maintenance.auto", "false"); + + /* Set maintenance strategy, if unset */ + if (!git_config_get_string("maintenance.strategy", &config_value)) + free(config_value); + else + git_config_set("maintenance.strategy", "incremental"); + + config_get.git_cmd = 1; + strvec_pushl(&config_get.args, "config", "--global", "--get", + "--fixed-value", "maintenance.repo", + the_repository->worktree ? the_repository->worktree + : the_repository->gitdir, + NULL); + config_get.out = -1; + + if (start_command(&config_get)) + return error(_("failed to run 'git config'")); + + /* We already have this value in our config! */ + if (!finish_command(&config_get)) + return 0; + + config_set.git_cmd = 1; + strvec_pushl(&config_set.args, "config", "--add", "--global", "maintenance.repo", + the_repository->worktree ? the_repository->worktree + : the_repository->gitdir, + NULL); + + return run_command(&config_set); +} + +static int maintenance_unregister(void) +{ + struct child_process config_unset = CHILD_PROCESS_INIT; + + config_unset.git_cmd = 1; + strvec_pushl(&config_unset.args, "config", "--global", "--unset", + "--fixed-value", "maintenance.repo", + the_repository->worktree ? the_repository->worktree + : the_repository->gitdir, + NULL); + + return run_command(&config_unset); +} + +#define BEGIN_LINE "# BEGIN GIT MAINTENANCE SCHEDULE" +#define END_LINE "# END GIT MAINTENANCE SCHEDULE" + +static int update_background_schedule(int run_maintenance) +{ + int result = 0; + int in_old_region = 0; + struct child_process crontab_list = CHILD_PROCESS_INIT; + struct child_process crontab_edit = CHILD_PROCESS_INIT; + FILE *cron_list, *cron_in; + const char *crontab_name; + struct strbuf line = STRBUF_INIT; + struct lock_file lk; + char *lock_path = xstrfmt("%s/schedule", the_repository->objects->odb->path); + + if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) + return error(_("another process is scheduling background maintenance")); + + crontab_name = getenv("GIT_TEST_CRONTAB"); + if (!crontab_name) + crontab_name = "crontab"; + + strvec_split(&crontab_list.args, crontab_name); + strvec_push(&crontab_list.args, "-l"); + crontab_list.in = -1; + crontab_list.out = dup(lk.tempfile->fd); + crontab_list.git_cmd = 0; + + if (start_command(&crontab_list)) { + result = error(_("failed to run 'crontab -l'; your system might not support 'cron'")); + goto cleanup; + } + + /* Ignore exit code, as an empty crontab will return error. */ + finish_command(&crontab_list); + + /* + * Read from the .lock file, filtering out the old + * schedule while appending the new schedule. + */ + cron_list = fdopen(lk.tempfile->fd, "r"); + rewind(cron_list); + + strvec_split(&crontab_edit.args, crontab_name); + crontab_edit.in = -1; + crontab_edit.git_cmd = 0; + + if (start_command(&crontab_edit)) { + result = error(_("failed to run 'crontab'; your system might not support 'cron'")); + goto cleanup; + } + + cron_in = fdopen(crontab_edit.in, "w"); + if (!cron_in) { + result = error(_("failed to open stdin of 'crontab'")); + goto done_editing; + } + + while (!strbuf_getline_lf(&line, cron_list)) { + if (!in_old_region && !strcmp(line.buf, BEGIN_LINE)) + in_old_region = 1; + else if (in_old_region && !strcmp(line.buf, END_LINE)) + in_old_region = 0; + else if (!in_old_region) + fprintf(cron_in, "%s\n", line.buf); + } + + if (run_maintenance) { + struct strbuf line_format = STRBUF_INIT; + const char *exec_path = git_exec_path(); + + fprintf(cron_in, "%s\n", BEGIN_LINE); + fprintf(cron_in, + "# The following schedule was created by Git\n"); + fprintf(cron_in, "# Any edits made in this region might be\n"); + fprintf(cron_in, + "# replaced in the future by a Git command.\n\n"); + + strbuf_addf(&line_format, + "%%s %%s * * %%s \"%s/git\" --exec-path=\"%s\" for-each-repo --config=maintenance.repo maintenance run --schedule=%%s\n", + exec_path, exec_path); + fprintf(cron_in, line_format.buf, "0", "1-23", "*", "hourly"); + fprintf(cron_in, line_format.buf, "0", "0", "1-6", "daily"); + fprintf(cron_in, line_format.buf, "0", "0", "0", "weekly"); + strbuf_release(&line_format); + + fprintf(cron_in, "\n%s\n", END_LINE); + } + + fflush(cron_in); + fclose(cron_in); + close(crontab_edit.in); + +done_editing: + if (finish_command(&crontab_edit)) { + result = error(_("'crontab' died")); + goto cleanup; + } + fclose(cron_list); + +cleanup: + rollback_lock_file(&lk); + return result; +} + +static int maintenance_start(void) +{ + if (maintenance_register()) + warning(_("failed to add repo to global config")); + + return update_background_schedule(1); +} + +static int maintenance_stop(void) +{ + return update_background_schedule(0); +} + +static const char builtin_maintenance_usage[] = N_("git maintenance <subcommand> [<options>]"); int cmd_maintenance(int argc, const char **argv, const char *prefix) { @@ -1033,6 +1621,14 @@ int cmd_maintenance(int argc, const char **argv, const char *prefix) if (!strcmp(argv[1], "run")) return maintenance_run(argc - 1, argv + 1, prefix); + if (!strcmp(argv[1], "start")) + return maintenance_start(); + if (!strcmp(argv[1], "stop")) + return maintenance_stop(); + if (!strcmp(argv[1], "register")) + return maintenance_register(); + if (!strcmp(argv[1], "unregister")) + return maintenance_unregister(); die(_("invalid subcommand: %s"), argv[1]); } |