diff options
author | Josh Steadmon <steadmon@google.com> | 2022-02-04 21:00:49 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2022-02-09 15:38:36 -0800 |
commit | f05da2b48b48a46db65fc768b3ffecaf996dd655 (patch) | |
tree | 11b0ef5fdb0e46d0cf82ca8be204775501dc9434 /builtin/clone.c | |
parent | Merge branch 'js/branch-track-inherit' (diff) | |
download | tgif-f05da2b48b48a46db65fc768b3ffecaf996dd655.tar.xz |
clone, submodule: pass partial clone filters to submodules
When cloning a repo with a --filter and with --recurse-submodules
enabled, the partial clone filter only applies to the top-level repo.
This can lead to unexpected bandwidth and disk usage for projects which
include large submodules. For example, a user might wish to make a
partial clone of Gerrit and would run:
`git clone --recurse-submodules --filter=blob:5k https://gerrit.googlesource.com/gerrit`.
However, only the superproject would be a partial clone; all the
submodules would have all blobs downloaded regardless of their size.
With this change, the same filter can also be applied to submodules,
meaning the expected bandwidth and disk savings apply consistently.
To avoid changing default behavior, add a new clone flag,
`--also-filter-submodules`. When this is set along with `--filter` and
`--recurse-submodules`, the filter spec is passed along to git-submodule
and git-submodule--helper, such that submodule clones also have the
filter applied.
This applies the same filter to the superproject and all submodules.
Users who need to customize the filter per-submodule would need to clone
with `--no-recurse-submodules` and then manually initialize each
submodule with the proper filter.
Applying filters to submodules should be safe thanks to Jonathan Tan's
recent work [1, 2, 3] eliminating the use of alternates as a method of
accessing submodule objects, so any submodule object access now triggers
a lazy fetch from the submodule's promisor remote if the accessed object
is missing. This patch is a reworked version of [4], which was created
prior to Jonathan Tan's work.
[1]: 8721e2e (Merge branch 'jt/partial-clone-submodule-1', 2021-07-16)
[2]: 11e5d0a (Merge branch 'jt/grep-wo-submodule-odb-as-alternate',
2021-09-20)
[3]: 162a13b (Merge branch 'jt/no-abuse-alternate-odb-for-submodules',
2021-10-25)
[4]: https://lore.kernel.org/git/52bf9d45b8e2b72ff32aa773f2415bf7b2b86da2.1563322192.git.steadmon@google.com/
Signed-off-by: Josh Steadmon <steadmon@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/clone.c')
-rw-r--r-- | builtin/clone.c | 36 |
1 files changed, 34 insertions, 2 deletions
diff --git a/builtin/clone.c b/builtin/clone.c index 727e16e0ae..fb605e4c8d 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -71,6 +71,8 @@ static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; static struct list_objects_filter_options filter_options; +static int option_filter_submodules = -1; /* unspecified */ +static int config_filter_submodules = -1; /* unspecified */ static struct string_list server_options = STRING_LIST_INIT_NODUP; static int option_remote_submodules; @@ -150,6 +152,8 @@ static struct option builtin_clone_options[] = { OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), + OPT_BOOL(0, "also-filter-submodules", &option_filter_submodules, + N_("apply partial clone filters to submodules")), OPT_BOOL(0, "remote-submodules", &option_remote_submodules, N_("any cloned submodules will use their remote-tracking branch")), OPT_BOOL(0, "sparse", &option_sparse_checkout, @@ -650,7 +654,7 @@ static int git_sparse_checkout_init(const char *repo) return result; } -static int checkout(int submodule_progress) +static int checkout(int submodule_progress, int filter_submodules) { struct object_id oid; char *head; @@ -729,6 +733,10 @@ static int checkout(int submodule_progress) strvec_push(&args, "--no-fetch"); } + if (filter_submodules && filter_options.choice) + strvec_pushf(&args, "--filter=%s", + expand_list_objects_filter_spec(&filter_options)); + if (option_single_branch >= 0) strvec_push(&args, option_single_branch ? "--single-branch" : @@ -749,6 +757,8 @@ static int git_clone_config(const char *k, const char *v, void *cb) } if (!strcmp(k, "clone.rejectshallow")) config_reject_shallow = git_config_bool(k, v); + if (!strcmp(k, "clone.filtersubmodules")) + config_filter_submodules = git_config_bool(k, v); return git_default_config(k, v, cb); } @@ -871,6 +881,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct remote *remote; int err = 0, complete_refs_before_fetch = 1; int submodule_progress; + int filter_submodules = 0; struct transport_ls_refs_options transport_ls_refs_options = TRANSPORT_LS_REFS_OPTIONS_INIT; @@ -1067,6 +1078,27 @@ int cmd_clone(int argc, const char **argv, const char *prefix) reject_shallow = option_reject_shallow; /* + * If option_filter_submodules is specified from CLI option, + * ignore config_filter_submodules from git_clone_config. + */ + if (config_filter_submodules != -1) + filter_submodules = config_filter_submodules; + if (option_filter_submodules != -1) + filter_submodules = option_filter_submodules; + + /* + * Exit if the user seems to be doing something silly with submodule + * filter flags (but not with filter configs, as those should be + * set-and-forget). + */ + if (option_filter_submodules > 0 && !filter_options.choice) + die(_("the option '%s' requires '%s'"), + "--also-filter-submodules", "--filter"); + if (option_filter_submodules > 0 && !option_recurse_submodules.nr) + die(_("the option '%s' requires '%s'"), + "--also-filter-submodules", "--recurse-submodules"); + + /* * apply the remote name provided by --origin only after this second * call to git_config, to ensure it overrides all config-based values. */ @@ -1299,7 +1331,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } junk_mode = JUNK_LEAVE_REPO; - err = checkout(submodule_progress); + err = checkout(submodule_progress, filter_submodules); free(remote_name); strbuf_release(&reflog_msg); |