From 2057d75038541cd16debb1c55f3f897fd244965c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:42 +0000 Subject: maintenance: create basic maintenance runner The 'gc' builtin is our current entrypoint for automatically maintaining a repository. This one tool does many operations, such as repacking the repository, packing refs, and rewriting the commit-graph file. The name implies it performs "garbage collection" which means several different things, and some users may not want to use this operation that rewrites the entire object database. Create a new 'maintenance' builtin that will become a more general- purpose command. To start, it will only support the 'run' subcommand, but will later expand to add subcommands for scheduling maintenance in the background. For now, the 'maintenance' builtin is a thin shim over the 'gc' builtin. In fact, the only option is the '--auto' toggle, which is handed directly to the 'gc' builtin. The current change is isolated to this simple operation to prevent more interesting logic from being lost in all of the boilerplate of adding a new builtin. Use existing builtin/gc.c file because we want to share code between the two builtins. It is possible that we will have 'maintenance' replace the 'gc' builtin entirely at some point, leaving 'git gc' as an alias for some specific arguments to 'git maintenance run'. Create a new test_subcommand helper that allows us to test if a certain subcommand was run. It requires storing the GIT_TRACE2_EVENT logs in a file. A negation mode is available that will be used in later tests. Helped-by: Jonathan Nieder Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index aafa0946f5..ec064e8686 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -699,3 +699,61 @@ int cmd_gc(int argc, const char **argv, const char *prefix) return 0; } + +static const char * const builtin_maintenance_run_usage[] = { + N_("git maintenance run [--auto]"), + NULL +}; + +struct maintenance_run_opts { + int auto_flag; +}; + +static int maintenance_task_gc(struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_push(&child.args, "gc"); + + if (opts->auto_flag) + strvec_push(&child.args, "--auto"); + + close_object_store(the_repository->objects); + return run_command(&child); +} + +static int maintenance_run(int argc, const char **argv, const char *prefix) +{ + struct maintenance_run_opts opts; + struct option builtin_maintenance_run_options[] = { + OPT_BOOL(0, "auto", &opts.auto_flag, + N_("run tasks based on the state of the repository")), + OPT_END() + }; + memset(&opts, 0, sizeof(opts)); + + argc = parse_options(argc, argv, prefix, + builtin_maintenance_run_options, + builtin_maintenance_run_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (argc != 0) + usage_with_options(builtin_maintenance_run_usage, + builtin_maintenance_run_options); + return maintenance_task_gc(&opts); +} + +static const char builtin_maintenance_usage[] = N_("git maintenance run []"); + +int cmd_maintenance(int argc, const char **argv, const char *prefix) +{ + if (argc < 2 || + (argc == 2 && !strcmp(argv[1], "-h"))) + usage(builtin_maintenance_usage); + + if (!strcmp(argv[1], "run")) + return maintenance_run(argc - 1, argv + 1, prefix); + + die(_("invalid subcommand: %s"), argv[1]); +} -- cgit v1.2.3 From 3ddaad0e06035a752c9010dcd5094b37262080fb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:43 +0000 Subject: maintenance: add --quiet option Maintenance activities are commonly used as steps in larger scripts. Providing a '--quiet' option allows those scripts to be less noisy when run on a terminal window. Turn this mode on by default when stderr is not a terminal. Pipe the option to the 'git gc' child process. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index ec064e8686..bacce0c747 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -701,12 +701,13 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } static const char * const builtin_maintenance_run_usage[] = { - N_("git maintenance run [--auto]"), + N_("git maintenance run [--auto] [--[no-]quiet]"), NULL }; struct maintenance_run_opts { int auto_flag; + int quiet; }; static int maintenance_task_gc(struct maintenance_run_opts *opts) @@ -718,6 +719,10 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts) if (opts->auto_flag) strvec_push(&child.args, "--auto"); + if (opts->quiet) + strvec_push(&child.args, "--quiet"); + else + strvec_push(&child.args, "--no-quiet"); close_object_store(the_repository->objects); return run_command(&child); @@ -729,10 +734,14 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, N_("run tasks based on the state of the repository")), + OPT_BOOL(0, "quiet", &opts.quiet, + N_("do not report progress or other information over stderr")), OPT_END() }; memset(&opts, 0, sizeof(opts)); + opts.quiet = !isatty(2); + argc = parse_options(argc, argv, prefix, builtin_maintenance_run_options, builtin_maintenance_run_usage, -- cgit v1.2.3 From a95ce124305adcc4980241b8877e06db1d6ed411 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:44 +0000 Subject: maintenance: replace run_auto_gc() The run_auto_gc() method is used in several places to trigger a check for repo maintenance after some Git commands, such as 'git commit' or 'git fetch'. To allow for extra customization of this maintenance activity, replace the 'git gc --auto [--quiet]' call with one to 'git maintenance run --auto [--quiet]'. As we extend the maintenance builtin with other steps, users will be able to select different maintenance activities. Rename run_auto_gc() to run_auto_maintenance() to be clearer what is happening on this call, and to expose all callers in the current diff. Rewrite the method to use a struct child_process to simplify the calls slightly. Since 'git fetch' already allows disabling the 'git gc --auto' subprocess, add an equivalent option with a different name to be more descriptive of the new behavior: '--[no-]maintenance'. Update the documentation to include these options at the same time. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/am.c | 2 +- builtin/commit.c | 2 +- builtin/fetch.c | 6 ++++-- builtin/merge.c | 2 +- builtin/rebase.c | 4 ++-- 5 files changed, 9 insertions(+), 7 deletions(-) (limited to 'builtin') diff --git a/builtin/am.c b/builtin/am.c index dd4e6c2d9b..68e9d17379 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -1795,7 +1795,7 @@ next: if (!state->rebasing) { am_destroy(state); close_object_store(the_repository->objects); - run_auto_gc(state->quiet); + run_auto_maintenance(state->quiet); } } diff --git a/builtin/commit.c b/builtin/commit.c index 69ac78d5e5..f9b0a0c05d 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1702,7 +1702,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) git_test_write_commit_graph_or_die(); repo_rerere(the_repository, 0); - run_auto_gc(quiet); + run_auto_maintenance(quiet); run_commit_hook(use_editor, get_index_file(), "post-commit", NULL); if (amend && !no_post_rewrite) { commit_post_rewrite(the_repository, current_head, &oid); diff --git a/builtin/fetch.c b/builtin/fetch.c index 2eb8d6a5a5..cb38e6f5ec 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -199,8 +199,10 @@ static struct option builtin_fetch_options[] = { OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"), N_("report that we have only objects reachable from this object")), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), + OPT_BOOL(0, "auto-maintenance", &enable_auto_gc, + N_("run 'maintenance --auto' after fetching")), OPT_BOOL(0, "auto-gc", &enable_auto_gc, - N_("run 'gc --auto' after fetching")), + N_("run 'maintenance --auto' after fetching")), OPT_BOOL(0, "show-forced-updates", &fetch_show_forced_updates, N_("check for forced-updates on all updated branches")), OPT_BOOL(0, "write-commit-graph", &fetch_write_commit_graph, @@ -1891,7 +1893,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) close_object_store(the_repository->objects); if (enable_auto_gc) - run_auto_gc(verbosity < 0); + run_auto_maintenance(verbosity < 0); return result; } diff --git a/builtin/merge.c b/builtin/merge.c index 74829a838e..8f31bfab56 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -456,7 +456,7 @@ static void finish(struct commit *head_commit, * user should see them. */ close_object_store(the_repository->objects); - run_auto_gc(verbosity < 0); + run_auto_maintenance(verbosity < 0); } } if (new_head && show_diffstat) { diff --git a/builtin/rebase.c b/builtin/rebase.c index dadb52fa92..9ffba9009d 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -728,10 +728,10 @@ static int finish_rebase(struct rebase_options *opts) apply_autostash(state_dir_path("autostash", opts)); close_object_store(the_repository->objects); /* - * We ignore errors in 'gc --auto', since the + * We ignore errors in 'git maintenance run --auto', since the * user should see them. */ - run_auto_gc(!(opts->flags & (REBASE_NO_QUIET|REBASE_VERBOSE))); + run_auto_maintenance(!(opts->flags & (REBASE_NO_QUIET|REBASE_VERBOSE))); if (opts->type == REBASE_MERGE) { struct replay_opts replay = REPLAY_OPTS_INIT; -- cgit v1.2.3 From 3103e9848f7b7534eac14c0521d7b49b32466b70 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:45 +0000 Subject: maintenance: initialize task array In anticipation of implementing multiple maintenance tasks inside the 'maintenance' builtin, use a list of structs to describe the work to be done. The struct maintenance_task stores the name of the task (as given by a future command-line argument) along with a function pointer to its implementation and a boolean for whether the step is enabled. A list these structs are initialized with the full list of implemented tasks along with a default order. For now, this list only contains the "gc" task. This task is also the only task enabled by default. The run subcommand will return a nonzero exit code if any task fails. However, it will attempt all tasks in its loop before returning with the failure. Also each failed task will print an error message. Helped-by: Taylor Blau Helped-by: Junio C Hamano Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index bacce0c747..904fb2d9aa 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -728,6 +728,47 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts) return run_command(&child); } +typedef int maintenance_task_fn(struct maintenance_run_opts *opts); + +struct maintenance_task { + const char *name; + maintenance_task_fn *fn; + unsigned enabled:1; +}; + +enum maintenance_task_label { + TASK_GC, + + /* Leave as final value */ + TASK__COUNT +}; + +static struct maintenance_task tasks[] = { + [TASK_GC] = { + "gc", + maintenance_task_gc, + 1, + }, +}; + +static int maintenance_run_tasks(struct maintenance_run_opts *opts) +{ + int i; + int result = 0; + + for (i = 0; i < TASK__COUNT; i++) { + if (!tasks[i].enabled) + continue; + + if (tasks[i].fn(opts)) { + error(_("task '%s' failed"), tasks[i].name); + result = 1; + } + } + + return result; +} + static int maintenance_run(int argc, const char **argv, const char *prefix) { struct maintenance_run_opts opts; @@ -750,7 +791,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) if (argc != 0) usage_with_options(builtin_maintenance_run_usage, builtin_maintenance_run_options); - return maintenance_task_gc(&opts); + return maintenance_run_tasks(&opts); } static const char builtin_maintenance_usage[] = N_("git maintenance run []"); -- cgit v1.2.3 From 663b2b1b90bf76275044824ddeca96aaec240f09 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:46 +0000 Subject: maintenance: add commit-graph task The first new task in the 'git maintenance' builtin is the 'commit-graph' task. This updates the commit-graph file incrementally with the command git commit-graph write --reachable --split By writing an incremental commit-graph file using the "--split" option we minimize the disruption from this operation. The default behavior is to merge layers until the new "top" layer is less than half the size of the layer below. This provides quick writes most of the time, with the longer writes following a power law distribution. Most importantly, concurrent Git processes only look at the commit-graph-chain file for a very short amount of time, so they will verly likely not be holding a handle to the file when we try to replace it. (This only matters on Windows.) If a concurrent process reads the old commit-graph-chain file, but our job expires some of the .graph files before they can be read, then those processes will see a warning message (but not fail). This could be avoided by a future update to use the --expire-time argument when writing the commit-graph. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index 904fb2d9aa..86b807a008 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -710,6 +710,31 @@ struct maintenance_run_opts { int quiet; }; +static int run_write_commit_graph(struct maintenance_run_opts *opts) +{ + struct child_process child = CHILD_PROCESS_INIT; + + child.git_cmd = 1; + strvec_pushl(&child.args, "commit-graph", "write", + "--split", "--reachable", NULL); + + if (opts->quiet) + strvec_push(&child.args, "--no-progress"); + + return !!run_command(&child); +} + +static int maintenance_task_commit_graph(struct maintenance_run_opts *opts) +{ + close_object_store(the_repository->objects); + if (run_write_commit_graph(opts)) { + error(_("failed to write commit-graph")); + return 1; + } + + return 0; +} + static int maintenance_task_gc(struct maintenance_run_opts *opts) { struct child_process child = CHILD_PROCESS_INIT; @@ -738,6 +763,7 @@ struct maintenance_task { enum maintenance_task_label { TASK_GC, + TASK_COMMIT_GRAPH, /* Leave as final value */ TASK__COUNT @@ -749,6 +775,10 @@ static struct maintenance_task tasks[] = { maintenance_task_gc, 1, }, + [TASK_COMMIT_GRAPH] = { + "commit-graph", + maintenance_task_commit_graph, + }, }; static int maintenance_run_tasks(struct maintenance_run_opts *opts) -- cgit v1.2.3 From 090511bc0b73bf3054f11df6f275ad3a3abbf34f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:47 +0000 Subject: maintenance: add --task option A user may want to only run certain maintenance tasks in a certain order. Add the --task= option, which allows a user to specify an ordered list of tasks to run. These cannot be run multiple times, however. Here is where our array of maintenance_task pointers becomes critical. We can sort the array of pointers based on the task order, but we do not want to move the struct data itself in order to preserve the hashmap references. We use the hashmap to match the --task= arguments into the task struct data. Keep in mind that the 'enabled' member of the maintenance_task struct is a placeholder for a future 'maintenance..enabled' config option. Thus, we use the 'enabled' member to specify which tasks are run when the user does not specify any --task= arguments. The 'enabled' member should be ignored if --task= appears. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 63 insertions(+), 3 deletions(-) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index 86b807a008..00fff59bdb 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -701,7 +701,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix) } static const char * const builtin_maintenance_run_usage[] = { - N_("git maintenance run [--auto] [--[no-]quiet]"), + N_("git maintenance run [--auto] [--[no-]quiet] [--task=]"), NULL }; @@ -759,6 +759,9 @@ struct maintenance_task { const char *name; maintenance_task_fn *fn; unsigned enabled:1; + + /* -1 if not selected. */ + int selected_order; }; enum maintenance_task_label { @@ -781,13 +784,32 @@ static struct maintenance_task tasks[] = { }, }; +static int compare_tasks_by_selection(const void *a_, const void *b_) +{ + const struct maintenance_task *a, *b; + + a = (const struct maintenance_task *)&a_; + b = (const struct maintenance_task *)&b_; + + return b->selected_order - a->selected_order; +} + static int maintenance_run_tasks(struct maintenance_run_opts *opts) { - int i; + int i, found_selected = 0; int result = 0; + for (i = 0; !found_selected && i < TASK__COUNT; i++) + found_selected = tasks[i].selected_order >= 0; + + if (found_selected) + QSORT(tasks, TASK__COUNT, compare_tasks_by_selection); + for (i = 0; i < TASK__COUNT; i++) { - if (!tasks[i].enabled) + if (found_selected && tasks[i].selected_order < 0) + continue; + + if (!found_selected && !tasks[i].enabled) continue; if (tasks[i].fn(opts)) { @@ -799,20 +821,58 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) return result; } +static int task_option_parse(const struct option *opt, + const char *arg, int unset) +{ + int i, num_selected = 0; + struct maintenance_task *task = NULL; + + BUG_ON_OPT_NEG(unset); + + for (i = 0; i < TASK__COUNT; i++) { + if (tasks[i].selected_order >= 0) + num_selected++; + if (!strcasecmp(tasks[i].name, arg)) { + task = &tasks[i]; + } + } + + if (!task) { + error(_("'%s' is not a valid task"), arg); + return 1; + } + + if (task->selected_order >= 0) { + error(_("task '%s' cannot be selected multiple times"), arg); + return 1; + } + + task->selected_order = num_selected + 1; + + return 0; +} + static int maintenance_run(int argc, const char **argv, const char *prefix) { + int i; struct maintenance_run_opts opts; struct option builtin_maintenance_run_options[] = { OPT_BOOL(0, "auto", &opts.auto_flag, N_("run tasks based on the state of the repository")), OPT_BOOL(0, "quiet", &opts.quiet, N_("do not report progress or other information over stderr")), + OPT_CALLBACK_F(0, "task", NULL, N_("task"), + N_("run a specific task"), + PARSE_OPT_NONEG, task_option_parse), OPT_END() }; memset(&opts, 0, sizeof(opts)); opts.quiet = !isatty(2); + for (i = 0; i < TASK__COUNT; i++) + tasks[i].selected_order = -1; + argc = parse_options(argc, argv, prefix, builtin_maintenance_run_options, builtin_maintenance_run_usage, -- cgit v1.2.3 From d7514f6ed57d20bcc9dcfb43016b95dba82ba790 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:48 +0000 Subject: maintenance: take a lock on the objects directory Performing maintenance on a Git repository involves writing data to the .git directory, which is not safe to do with multiple writers attempting the same operation. Ensure that only one 'git maintenance' process is running at a time by holding a file-based lock. Simply the presence of the .git/maintenance.lock file will prevent future maintenance. This lock is never committed, since it does not represent meaningful data. Instead, it is only a placeholder. If the lock file already exists, then no maintenance tasks are attempted. This will become very important later when we implement the 'prefetch' task, as this is our stop-gap from creating a recursive process loop between 'git fetch' and 'git maintenance run --auto'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index 00fff59bdb..7ba9c6f7c9 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -798,6 +798,25 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) { int i, found_selected = 0; int result = 0; + struct lock_file lk; + struct repository *r = the_repository; + char *lock_path = xstrfmt("%s/maintenance", r->objects->odb->path); + + if (hold_lock_file_for_update(&lk, lock_path, LOCK_NO_DEREF) < 0) { + /* + * Another maintenance command is running. + * + * If --auto was provided, then it is likely due to a + * recursive process stack. Do not report an error in + * that case. + */ + if (!opts->auto_flag && !opts->quiet) + warning(_("lock file '%s' exists, skipping maintenance"), + lock_path); + free(lock_path); + return 0; + } + free(lock_path); for (i = 0; !found_selected && i < TASK__COUNT; i++) found_selected = tasks[i].selected_order >= 0; @@ -818,6 +837,7 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) } } + rollback_lock_file(&lk); return result; } -- cgit v1.2.3 From 65d655b52df3d4db0b4980f60106ed9785d025f8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:49 +0000 Subject: maintenance: create maintenance..enabled config Currently, a normal run of "git maintenance run" will only run the 'gc' task, as it is the only one enabled. This is mostly for backwards- compatible reasons since "git maintenance run --auto" commands replaced previous "git gc --auto" commands after some Git processes. Users could manually run specific maintenance tasks by calling "git maintenance run --task=" directly. Allow users to customize which steps are run automatically using config. The 'maintenance..enabled' option then can turn on these other tasks (or turn off the 'gc' task). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index 7ba9c6f7c9..55a3d836f0 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -841,6 +841,24 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) return result; } +static void initialize_task_config(void) +{ + int i; + struct strbuf config_name = STRBUF_INIT; + for (i = 0; i < TASK__COUNT; i++) { + int config_value; + + strbuf_setlen(&config_name, 0); + strbuf_addf(&config_name, "maintenance.%s.enabled", + tasks[i].name); + + if (!git_config_get_bool(config_name.buf, &config_value)) + tasks[i].enabled = config_value; + } + + strbuf_release(&config_name); +} + static int task_option_parse(const struct option *opt, const char *arg, int unset) { @@ -889,6 +907,7 @@ static int maintenance_run(int argc, const char **argv, const char *prefix) memset(&opts, 0, sizeof(opts)); opts.quiet = !isatty(2); + initialize_task_config(); for (i = 0; i < TASK__COUNT; i++) tasks[i].selected_order = -1; -- cgit v1.2.3 From 916d0626c202c18e6d615abb69ba1f15b335c3ea Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:50 +0000 Subject: maintenance: use pointers to check --auto The 'git maintenance run' command has an '--auto' option. This is used by other Git commands such as 'git commit' or 'git fetch' to check if maintenance should be run after adding data to the repository. Previously, this --auto option was only used to add the argument to the 'git gc' command as part of the 'gc' task. We will be expanding the other tasks to perform a check to see if they should do work as part of the --auto flag, when they are enabled by config. First, update the 'gc' task to perform the auto check inside the maintenance process. This prevents running an extra 'git gc --auto' command when not needed. It also shows a model for other tasks. Second, use the 'auto_condition' function pointer as a signal for whether we enable the maintenance task under '--auto'. For instance, we do not want to enable the 'fetch' task in '--auto' mode, so that function pointer will remain NULL. Now that we are not automatically calling 'git gc', a test in t5514-fetch-multiple.sh must be changed to watch for 'git maintenance' instead. We continue to pass the '--auto' option to the 'git gc' command when necessary, because of the gc.autoDetach config option changes behavior. Likely, we will want to absorb the daemonizing behavior implied by gc.autoDetach as a maintenance.autoDetach config option. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index 55a3d836f0..13c24bca7d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -755,9 +755,17 @@ static int maintenance_task_gc(struct maintenance_run_opts *opts) typedef int maintenance_task_fn(struct maintenance_run_opts *opts); +/* + * An auto condition function returns 1 if the task should run + * and 0 if the task should NOT run. See needs_to_gc() for an + * example. + */ +typedef int maintenance_auto_fn(void); + struct maintenance_task { const char *name; maintenance_task_fn *fn; + maintenance_auto_fn *auto_condition; unsigned enabled:1; /* -1 if not selected. */ @@ -776,6 +784,7 @@ static struct maintenance_task tasks[] = { [TASK_GC] = { "gc", maintenance_task_gc, + need_to_gc, 1, }, [TASK_COMMIT_GRAPH] = { @@ -831,6 +840,11 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) if (!found_selected && !tasks[i].enabled) continue; + if (opts->auto_flag && + (!tasks[i].auto_condition || + !tasks[i].auto_condition())) + continue; + if (tasks[i].fn(opts)) { error(_("task '%s' failed"), tasks[i].name); result = 1; @@ -845,6 +859,8 @@ static void initialize_task_config(void) { int i; struct strbuf config_name = STRBUF_INIT; + gc_config(); + for (i = 0; i < TASK__COUNT; i++) { int config_value; -- cgit v1.2.3 From 4ddc79b2dac18189e7ff18e99a6fabdd161f866c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:51 +0000 Subject: maintenance: add auto condition for commit-graph task Instead of writing a new commit-graph in every 'git maintenance run --auto' process (when maintenance.commit-graph.enalbed is configured to be true), only write when there are "enough" commits not in a commit-graph file. This count is controlled by the maintenance.commit-graph.auto config option. To compute the count, use a depth-first search starting at each ref, and leaving markers using the SEEN flag. If this count reaches the limit, then terminate early and start the task. Otherwise, this operation will peel every ref and parse the commit it points to. If these are all in the commit-graph, then this is typically a very fast operation. Users with many refs might feel a slow-down, and hence could consider updating their limit to be very small. A negative value will force the step to run every time. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index 13c24bca7d..4db853c561 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -28,6 +28,7 @@ #include "blob.h" #include "tree.h" #include "promisor-remote.h" +#include "refs.h" #define FAILED_RUN "failed to run %s" @@ -710,6 +711,86 @@ struct maintenance_run_opts { int quiet; }; +/* Remember to update object flag allocation in object.h */ +#define SEEN (1u<<0) + +struct cg_auto_data { + int num_not_in_graph; + int limit; +}; + +static int dfs_on_ref(const char *refname, + const struct object_id *oid, int flags, + void *cb_data) +{ + struct cg_auto_data *data = (struct cg_auto_data *)cb_data; + int result = 0; + struct object_id peeled; + struct commit_list *stack = NULL; + struct commit *commit; + + if (!peel_ref(refname, &peeled)) + oid = &peeled; + if (oid_object_info(the_repository, oid, NULL) != OBJ_COMMIT) + return 0; + + commit = lookup_commit(the_repository, oid); + if (!commit) + return 0; + if (parse_commit(commit)) + return 0; + + commit_list_append(commit, &stack); + + while (!result && stack) { + struct commit_list *parent; + + commit = pop_commit(&stack); + + for (parent = commit->parents; parent; parent = parent->next) { + if (parse_commit(parent->item) || + commit_graph_position(parent->item) != COMMIT_NOT_FROM_GRAPH || + parent->item->object.flags & SEEN) + continue; + + parent->item->object.flags |= SEEN; + data->num_not_in_graph++; + + if (data->num_not_in_graph >= data->limit) { + result = 1; + break; + } + + commit_list_append(parent->item, &stack); + } + } + + free_commit_list(stack); + return result; +} + +static int should_write_commit_graph(void) +{ + int result; + struct cg_auto_data data; + + data.num_not_in_graph = 0; + data.limit = 100; + git_config_get_int("maintenance.commit-graph.auto", + &data.limit); + + if (!data.limit) + return 0; + if (data.limit < 0) + return 1; + + result = for_each_ref(dfs_on_ref, &data); + + clear_commit_marks_all(SEEN); + + return result; +} + static int run_write_commit_graph(struct maintenance_run_opts *opts) { struct child_process child = CHILD_PROCESS_INIT; @@ -790,6 +871,7 @@ static struct maintenance_task tasks[] = { [TASK_COMMIT_GRAPH] = { "commit-graph", maintenance_task_commit_graph, + should_write_commit_graph, }, }; -- cgit v1.2.3 From 25914c4fdeefd99b06e134496dfb9bbb58a5c417 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 17 Sep 2020 18:11:52 +0000 Subject: maintenance: add trace2 regions for task execution Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- builtin/gc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'builtin') diff --git a/builtin/gc.c b/builtin/gc.c index 4db853c561..090959350e 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -927,10 +927,12 @@ static int maintenance_run_tasks(struct maintenance_run_opts *opts) !tasks[i].auto_condition())) continue; + trace2_region_enter("maintenance", tasks[i].name, r); if (tasks[i].fn(opts)) { error(_("task '%s' failed"), tasks[i].name); result = 1; } + trace2_region_leave("maintenance", tasks[i].name, r); } rollback_lock_file(&lk); -- cgit v1.2.3