diff options
Diffstat (limited to 'submodule.c')
-rw-r--r-- | submodule.c | 469 |
1 files changed, 319 insertions, 150 deletions
diff --git a/submodule.c b/submodule.c index c689070524..86c8f0f89d 100644 --- a/submodule.c +++ b/submodule.c @@ -22,6 +22,7 @@ #include "parse-options.h" #include "object-store.h" #include "commit-reach.h" +#include "shallow.h" static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF; static int initialized_fetch_ref_tips; @@ -167,26 +168,6 @@ void stage_updated_gitmodules(struct index_state *istate) static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP; -/* TODO: remove this function, use repo_submodule_init instead. */ -int add_submodule_odb(const char *path) -{ - struct strbuf objects_directory = STRBUF_INIT; - int ret = 0; - - ret = strbuf_git_path_submodule(&objects_directory, path, "objects/"); - if (ret) - goto done; - if (!is_directory(objects_directory.buf)) { - ret = -1; - goto done; - } - string_list_insert(&added_submodule_odb_paths, - strbuf_detach(&objects_directory, NULL)); -done: - strbuf_release(&objects_directory); - return ret; -} - void add_submodule_odb_by_path(const char *path) { string_list_insert(&added_submodule_odb_paths, xstrdup(path)); @@ -267,7 +248,9 @@ int option_parse_recurse_submodules_worktree_updater(const struct option *opt, * ie, the config looks like: "[submodule] active\n". * Since that is an invalid pathspec, we should inform the user. */ -int is_submodule_active(struct repository *repo, const char *path) +int is_tree_submodule_active(struct repository *repo, + const struct object_id *treeish_name, + const char *path) { int ret = 0; char *key = NULL; @@ -275,7 +258,7 @@ int is_submodule_active(struct repository *repo, const char *path) const struct string_list *sl; const struct submodule *module; - module = submodule_from_path(repo, null_oid(), path); + module = submodule_from_path(repo, treeish_name, path); /* early return if there isn't a path->module mapping */ if (!module) @@ -317,6 +300,11 @@ int is_submodule_active(struct repository *repo, const char *path) return ret; } +int is_submodule_active(struct repository *repo, const char *path) +{ + return is_tree_submodule_active(repo, null_oid(), path); +} + int is_submodule_populated_gently(const char *path, int *return_error_code) { int ret = 0; @@ -775,19 +763,6 @@ const struct submodule *submodule_from_ce(const struct cache_entry *ce) return submodule_from_path(the_repository, null_oid(), ce->name); } -static struct oid_array *submodule_commits(struct string_list *submodules, - const char *name) -{ - struct string_list_item *item; - - item = string_list_insert(submodules, name); - if (item->util) - return (struct oid_array *) item->util; - - /* NEEDSWORK: should we have oid_array_init()? */ - item->util = xcalloc(1, sizeof(struct oid_array)); - return (struct oid_array *) item->util; -} struct collect_changed_submodules_cb_data { struct repository *repo; @@ -812,6 +787,52 @@ static const char *default_name_or_path(const char *path_or_name) return path_or_name; } +/* + * Holds relevant information for a changed submodule. Used as the .util + * member of the changed submodule name string_list_item. + * + * (super_oid, path) allows the submodule config to be read from _some_ + * .gitmodules file. We store this information the first time we find a + * superproject commit that points to the submodule, but this is + * arbitrary - we can choose any (super_oid, path) that matches the + * submodule's name. + * + * NEEDSWORK: Storing an arbitrary commit is undesirable because we can't + * guarantee that we're reading the commit that the user would expect. A better + * scheme would be to just fetch a submodule by its name. This requires two + * steps: + * - Create a function that behaves like repo_submodule_init(), but accepts a + * submodule name instead of treeish_name and path. This should be easy + * because repo_submodule_init() internally uses the submodule's name. + * + * - Replace most instances of 'struct submodule' (which is the .gitmodules + * config) with just the submodule name. This is OK because we expect + * submodule settings to be stored in .git/config (via "git submodule init"), + * not .gitmodules. This also lets us delete get_non_gitmodules_submodule(), + * which constructs a bogus 'struct submodule' for the sake of giving a + * placeholder name to a gitlink. + */ +struct changed_submodule_data { + /* + * The first superproject commit in the rev walk that points to + * the submodule. + */ + const struct object_id *super_oid; + /* + * Path to the submodule in the superproject commit referenced + * by 'super_oid'. + */ + char *path; + /* The submodule commits that have changed in the rev walk. */ + struct oid_array new_commits; +}; + +static void changed_submodule_data_clear(struct changed_submodule_data *cs_data) +{ + oid_array_clear(&cs_data->new_commits); + free(cs_data->path); +} + static void collect_changed_submodules_cb(struct diff_queue_struct *q, struct diff_options *options, void *data) @@ -823,9 +844,10 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q, for (i = 0; i < q->nr; i++) { struct diff_filepair *p = q->queue[i]; - struct oid_array *commits; const struct submodule *submodule; const char *name; + struct string_list_item *item; + struct changed_submodule_data *cs_data; if (!S_ISGITLINK(p->two->mode)) continue; @@ -852,8 +874,16 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q, if (!name) continue; - commits = submodule_commits(changed, name); - oid_array_append(commits, &p->two->oid); + item = string_list_insert(changed, name); + if (item->util) + cs_data = item->util; + else { + item->util = xcalloc(1, sizeof(struct changed_submodule_data)); + cs_data = item->util; + cs_data->super_oid = commit_oid; + cs_data->path = xstrdup(p->two->path); + } + oid_array_append(&cs_data->new_commits, &p->two->oid); } } @@ -900,11 +930,12 @@ static void collect_changed_submodules(struct repository *r, reset_revision_walk(); } -static void free_submodules_oids(struct string_list *submodules) +static void free_submodules_data(struct string_list *submodules) { struct string_list_item *item; for_each_string_list_item(item, submodules) - oid_array_clear((struct oid_array *) item->util); + changed_submodule_data_clear(item->util); + string_list_clear(submodules, 1); } @@ -925,6 +956,7 @@ struct has_commit_data { struct repository *repo; int result; const char *path; + const struct object_id *super_oid; }; static int check_has_commit(const struct object_id *oid, void *data) @@ -933,9 +965,10 @@ static int check_has_commit(const struct object_id *oid, void *data) struct repository subrepo; enum object_type type; - if (repo_submodule_init(&subrepo, cb->repo, cb->path, null_oid())) { + if (repo_submodule_init(&subrepo, cb->repo, cb->path, cb->super_oid)) { cb->result = 0; - goto cleanup; + /* subrepo failed to init, so don't clean it up. */ + return 0; } type = oid_object_info(&subrepo, oid, NULL); @@ -961,21 +994,15 @@ cleanup: static int submodule_has_commits(struct repository *r, const char *path, + const struct object_id *super_oid, struct oid_array *commits) { - struct has_commit_data has_commit = { r, 1, path }; - - /* - * Perform a cheap, but incorrect check for the existence of 'commits'. - * This is done by adding the submodule's object store to the in-core - * object store, and then querying for each commit's existence. If we - * do not have the commit object anywhere, there is no chance we have - * it in the object store of the correct submodule and have it - * reachable from a ref, so we can fail early without spawning rev-list - * which is expensive. - */ - if (add_submodule_odb(path)) - return 0; + struct has_commit_data has_commit = { + .repo = r, + .result = 1, + .path = path, + .super_oid = super_oid + }; oid_array_for_each_unique(commits, check_has_commit, &has_commit); @@ -1010,7 +1037,7 @@ static int submodule_needs_pushing(struct repository *r, const char *path, struct oid_array *commits) { - if (!submodule_has_commits(r, path, commits)) + if (!submodule_has_commits(r, path, null_oid(), commits)) /* * NOTE: We do consider it safe to return "no" here. The * correct answer would be "We do not know" instead of @@ -1070,7 +1097,7 @@ int find_unpushed_submodules(struct repository *r, collect_changed_submodules(r, &submodules, &argv); for_each_string_list_item(name, &submodules) { - struct oid_array *commits = name->util; + struct changed_submodule_data *cs_data = name->util; const struct submodule *submodule; const char *path = NULL; @@ -1083,11 +1110,11 @@ int find_unpushed_submodules(struct repository *r, if (!path) continue; - if (submodule_needs_pushing(r, path, commits)) + if (submodule_needs_pushing(r, path, &cs_data->new_commits)) string_list_insert(needs_pushing, path); } - free_submodules_oids(&submodules); + free_submodules_data(&submodules); strvec_clear(&argv); return needs_pushing->nr; @@ -1233,14 +1260,36 @@ void check_for_new_submodule_commits(struct object_id *oid) oid_array_append(&ref_tips_after_fetch, oid); } +/* + * Returns 1 if there is at least one submodule gitdir in + * $GIT_DIR/modules and 0 otherwise. This follows + * submodule_name_to_gitdir(), which looks for submodules in + * $GIT_DIR/modules, not $GIT_COMMON_DIR. + * + * A submodule can be moved to $GIT_DIR/modules manually by running "git + * submodule absorbgitdirs", or it may be initialized there by "git + * submodule update". + */ +static int repo_has_absorbed_submodules(struct repository *r) +{ + int ret; + struct strbuf buf = STRBUF_INIT; + + strbuf_repo_git_path(&buf, r, "modules/"); + ret = file_exists(buf.buf) && !is_empty_dir(buf.buf); + strbuf_release(&buf); + return ret; +} + static void calculate_changed_submodule_paths(struct repository *r, struct string_list *changed_submodule_names) { struct strvec argv = STRVEC_INIT; struct string_list_item *name; - /* No need to check if there are no submodules configured */ - if (!submodule_from_path(r, NULL, NULL)) + /* No need to check if no submodules would be fetched */ + if (!submodule_from_path(r, NULL, NULL) && + !repo_has_absorbed_submodules(r)) return; strvec_push(&argv, "--"); /* argv[0] program name */ @@ -1257,7 +1306,7 @@ static void calculate_changed_submodule_paths(struct repository *r, collect_changed_submodules(r, changed_submodule_names, &argv); for_each_string_list_item(name, changed_submodule_names) { - struct oid_array *commits = name->util; + struct changed_submodule_data *cs_data = name->util; const struct submodule *submodule; const char *path = NULL; @@ -1270,8 +1319,8 @@ static void calculate_changed_submodule_paths(struct repository *r, if (!path) continue; - if (submodule_has_commits(r, path, commits)) { - oid_array_clear(commits); + if (submodule_has_commits(r, path, null_oid(), &cs_data->new_commits)) { + changed_submodule_data_clear(cs_data); *name->string = '\0'; } } @@ -1308,12 +1357,21 @@ int submodule_touches_in_range(struct repository *r, strvec_clear(&args); - free_submodules_oids(&subs); + free_submodules_data(&subs); return ret; } struct submodule_parallel_fetch { - int count; + /* + * The index of the last index entry processed by + * get_fetch_task_from_index(). + */ + int index_count; + /* + * The index of the last string_list entry processed by + * get_fetch_task_from_changed(). + */ + int changed_count; struct strvec args; struct repository *r; const char *prefix; @@ -1322,7 +1380,16 @@ struct submodule_parallel_fetch { int quiet; int result; + /* + * Names of submodules that have new commits. Generated by + * walking the newly fetched superproject commits. + */ struct string_list changed_submodule_names; + /* + * Names of submodules that have already been processed. Lets us + * avoid fetching the same submodule more than once. + */ + struct string_list seen_submodule_names; /* Pending fetches by OIDs */ struct fetch_task **oid_fetch_tasks; @@ -1333,6 +1400,7 @@ struct submodule_parallel_fetch { #define SPF_INIT { \ .args = STRVEC_INIT, \ .changed_submodule_names = STRING_LIST_INIT_DUP, \ + .seen_submodule_names = STRING_LIST_INIT_DUP, \ .submodules_with_errors = STRBUF_INIT, \ } @@ -1369,6 +1437,8 @@ struct fetch_task { struct repository *repo; const struct submodule *sub; unsigned free_sub : 1; /* Do we need to free the submodule? */ + const char *default_argv; /* The default fetch mode. */ + struct strvec git_args; /* Args for the child git process. */ struct oid_array *commits; /* Ensure these commits are fetched */ }; @@ -1394,31 +1464,6 @@ static const struct submodule *get_non_gitmodules_submodule(const char *path) return (const struct submodule *) ret; } -static struct fetch_task *fetch_task_create(struct repository *r, - const char *path) -{ - struct fetch_task *task = xmalloc(sizeof(*task)); - memset(task, 0, sizeof(*task)); - - task->sub = submodule_from_path(r, null_oid(), path); - if (!task->sub) { - /* - * No entry in .gitmodules? Technically not a submodule, - * but historically we supported repositories that happen to be - * in-place where a gitlink is. Keep supporting them. - */ - task->sub = get_non_gitmodules_submodule(path); - if (!task->sub) { - free(task); - return NULL; - } - - task->free_sub = 1; - } - - return task; -} - static void fetch_task_release(struct fetch_task *p) { if (p->free_sub) @@ -1429,14 +1474,17 @@ static void fetch_task_release(struct fetch_task *p) if (p->repo) repo_clear(p->repo); FREE_AND_NULL(p->repo); + + strvec_clear(&p->git_args); } static struct repository *get_submodule_repo_for(struct repository *r, - const char *path) + const char *path, + const struct object_id *treeish_name) { struct repository *ret = xmalloc(sizeof(*ret)); - if (repo_submodule_init(ret, r, path, null_oid())) { + if (repo_submodule_init(ret, r, path, treeish_name)) { free(ret); return NULL; } @@ -1444,67 +1492,83 @@ static struct repository *get_submodule_repo_for(struct repository *r, return ret; } -static int get_next_submodule(struct child_process *cp, - struct strbuf *err, void *data, void **task_cb) +static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf, + const char *path, + const struct object_id *treeish_name) { - struct submodule_parallel_fetch *spf = data; + struct fetch_task *task = xmalloc(sizeof(*task)); + memset(task, 0, sizeof(*task)); + + task->sub = submodule_from_path(spf->r, treeish_name, path); + + if (!task->sub) { + /* + * No entry in .gitmodules? Technically not a submodule, + * but historically we supported repositories that happen to be + * in-place where a gitlink is. Keep supporting them. + */ + task->sub = get_non_gitmodules_submodule(path); + if (!task->sub) + goto cleanup; - for (; spf->count < spf->r->index->cache_nr; spf->count++) { - const struct cache_entry *ce = spf->r->index->cache[spf->count]; - const char *default_argv; + task->free_sub = 1; + } + + if (string_list_lookup(&spf->seen_submodule_names, task->sub->name)) + goto cleanup; + + switch (get_fetch_recurse_config(task->sub, spf)) + { + default: + case RECURSE_SUBMODULES_DEFAULT: + case RECURSE_SUBMODULES_ON_DEMAND: + if (!task->sub || + !string_list_lookup( + &spf->changed_submodule_names, + task->sub->name)) + goto cleanup; + task->default_argv = "on-demand"; + break; + case RECURSE_SUBMODULES_ON: + task->default_argv = "yes"; + break; + case RECURSE_SUBMODULES_OFF: + goto cleanup; + } + + task->repo = get_submodule_repo_for(spf->r, path, treeish_name); + + return task; + + cleanup: + fetch_task_release(task); + free(task); + return NULL; +} + +static struct fetch_task * +get_fetch_task_from_index(struct submodule_parallel_fetch *spf, + struct strbuf *err) +{ + for (; spf->index_count < spf->r->index->cache_nr; spf->index_count++) { + const struct cache_entry *ce = + spf->r->index->cache[spf->index_count]; struct fetch_task *task; if (!S_ISGITLINK(ce->ce_mode)) continue; - task = fetch_task_create(spf->r, ce->name); + task = fetch_task_create(spf, ce->name, null_oid()); if (!task) continue; - switch (get_fetch_recurse_config(task->sub, spf)) - { - default: - case RECURSE_SUBMODULES_DEFAULT: - case RECURSE_SUBMODULES_ON_DEMAND: - if (!task->sub || - !string_list_lookup( - &spf->changed_submodule_names, - task->sub->name)) - continue; - default_argv = "on-demand"; - break; - case RECURSE_SUBMODULES_ON: - default_argv = "yes"; - break; - case RECURSE_SUBMODULES_OFF: - continue; - } - - task->repo = get_submodule_repo_for(spf->r, task->sub->path); if (task->repo) { - struct strbuf submodule_prefix = STRBUF_INIT; - child_process_init(cp); - cp->dir = task->repo->gitdir; - prepare_submodule_repo_env_in_gitdir(&cp->env_array); - cp->git_cmd = 1; if (!spf->quiet) strbuf_addf(err, _("Fetching submodule %s%s\n"), spf->prefix, ce->name); - strvec_init(&cp->args); - strvec_pushv(&cp->args, spf->args.v); - strvec_push(&cp->args, default_argv); - strvec_push(&cp->args, "--submodule-prefix"); - - strbuf_addf(&submodule_prefix, "%s%s/", - spf->prefix, - task->sub->path); - strvec_push(&cp->args, submodule_prefix.buf); - - spf->count++; - *task_cb = task; - strbuf_release(&submodule_prefix); - return 1; + spf->index_count++; + return task; } else { struct strbuf empty_submodule_path = STRBUF_INIT; @@ -1528,6 +1592,111 @@ static int get_next_submodule(struct child_process *cp, strbuf_release(&empty_submodule_path); } } + return NULL; +} + +static struct fetch_task * +get_fetch_task_from_changed(struct submodule_parallel_fetch *spf, + struct strbuf *err) +{ + for (; spf->changed_count < spf->changed_submodule_names.nr; + spf->changed_count++) { + struct string_list_item item = + spf->changed_submodule_names.items[spf->changed_count]; + struct changed_submodule_data *cs_data = item.util; + struct fetch_task *task; + + if (!is_tree_submodule_active(spf->r, cs_data->super_oid,cs_data->path)) + continue; + + task = fetch_task_create(spf, cs_data->path, + cs_data->super_oid); + if (!task) + continue; + + if (!task->repo) { + strbuf_addf(err, _("Could not access submodule '%s' at commit %s\n"), + cs_data->path, + find_unique_abbrev(cs_data->super_oid, DEFAULT_ABBREV)); + + fetch_task_release(task); + free(task); + continue; + } + + if (!spf->quiet) + strbuf_addf(err, + _("Fetching submodule %s%s at commit %s\n"), + spf->prefix, task->sub->path, + find_unique_abbrev(cs_data->super_oid, + DEFAULT_ABBREV)); + + spf->changed_count++; + /* + * NEEDSWORK: Submodules set/unset a value for + * core.worktree when they are populated/unpopulated by + * "git checkout" (and similar commands, see + * submodule_move_head() and + * connect_work_tree_and_git_dir()), but if the + * submodule is unpopulated in another way (e.g. "git + * rm", "rm -r"), core.worktree will still be set even + * though the directory doesn't exist, and the child + * process will crash while trying to chdir into the + * nonexistent directory. + * + * In this case, we know that the submodule has no + * working tree, so we can work around this by + * setting "--work-tree=." (--bare does not work because + * worktree settings take precedence over bare-ness). + * However, this is not necessarily true in other cases, + * so a generalized solution is still necessary. + * + * Possible solutions: + * - teach "git [add|rm]" to unset core.worktree and + * discourage users from removing submodules without + * using a Git command. + * - teach submodule child processes to ignore stale + * core.worktree values. + */ + strvec_push(&task->git_args, "--work-tree=."); + return task; + } + return NULL; +} + +static int get_next_submodule(struct child_process *cp, struct strbuf *err, + void *data, void **task_cb) +{ + struct submodule_parallel_fetch *spf = data; + struct fetch_task *task = + get_fetch_task_from_index(spf, err); + if (!task) + task = get_fetch_task_from_changed(spf, err); + + if (task) { + struct strbuf submodule_prefix = STRBUF_INIT; + + child_process_init(cp); + cp->dir = task->repo->gitdir; + prepare_submodule_repo_env_in_gitdir(&cp->env_array); + cp->git_cmd = 1; + strvec_init(&cp->args); + if (task->git_args.nr) + strvec_pushv(&cp->args, task->git_args.v); + strvec_pushv(&cp->args, spf->args.v); + strvec_push(&cp->args, task->default_argv); + strvec_push(&cp->args, "--submodule-prefix"); + + strbuf_addf(&submodule_prefix, "%s%s/", + spf->prefix, + task->sub->path); + strvec_push(&cp->args, submodule_prefix.buf); + *task_cb = task; + + strbuf_release(&submodule_prefix); + string_list_insert(&spf->seen_submodule_names, task->sub->name); + return 1; + } if (spf->oid_fetch_tasks_nr) { struct fetch_task *task = @@ -1590,7 +1759,7 @@ static int fetch_finish(int retvalue, struct strbuf *err, struct fetch_task *task = task_cb; struct string_list_item *it; - struct oid_array *commits; + struct changed_submodule_data *cs_data; if (!task || !task->sub) BUG("callback cookie bogus"); @@ -1618,14 +1787,14 @@ static int fetch_finish(int retvalue, struct strbuf *err, /* Could be an unchanged submodule, not contained in the list */ goto out; - commits = it->util; - oid_array_filter(commits, + cs_data = it->util; + oid_array_filter(&cs_data->new_commits, commit_missing_in_sub, task->repo); /* Are there commits we want, but do not exist? */ - if (commits->nr) { - task->commits = commits; + if (cs_data->new_commits.nr) { + task->commits = &cs_data->new_commits; ALLOC_GROW(spf->oid_fetch_tasks, spf->oid_fetch_tasks_nr + 1, spf->oid_fetch_tasks_alloc); @@ -1640,11 +1809,11 @@ out: return 0; } -int fetch_populated_submodules(struct repository *r, - const struct strvec *options, - const char *prefix, int command_line_option, - int default_option, - int quiet, int max_parallel_jobs) +int fetch_submodules(struct repository *r, + const struct strvec *options, + const char *prefix, int command_line_option, + int default_option, + int quiet, int max_parallel_jobs) { int i; struct submodule_parallel_fetch spf = SPF_INIT; @@ -1683,7 +1852,7 @@ int fetch_populated_submodules(struct repository *r, strvec_clear(&spf.args); out: - free_submodules_oids(&spf.changed_submodule_names); + free_submodules_data(&spf.changed_submodule_names); return spf.result; } |