summaryrefslogtreecommitdiff
path: root/submodule.c
diff options
context:
space:
mode:
Diffstat (limited to 'submodule.c')
-rw-r--r--submodule.c458
1 files changed, 310 insertions, 148 deletions
diff --git a/submodule.c b/submodule.c
index 5ace18a7d9..86c8f0f89d 100644
--- a/submodule.c
+++ b/submodule.c
@@ -22,6 +22,7 @@
#include "parse-options.h"
#include "object-store.h"
#include "commit-reach.h"
+#include "shallow.h"
static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF;
static int initialized_fetch_ref_tips;
@@ -167,26 +168,6 @@ void stage_updated_gitmodules(struct index_state *istate)
static struct string_list added_submodule_odb_paths = STRING_LIST_INIT_NODUP;
-/* TODO: remove this function, use repo_submodule_init instead. */
-int add_submodule_odb(const char *path)
-{
- struct strbuf objects_directory = STRBUF_INIT;
- int ret = 0;
-
- ret = strbuf_git_path_submodule(&objects_directory, path, "objects/");
- if (ret)
- goto done;
- if (!is_directory(objects_directory.buf)) {
- ret = -1;
- goto done;
- }
- string_list_insert(&added_submodule_odb_paths,
- strbuf_detach(&objects_directory, NULL));
-done:
- strbuf_release(&objects_directory);
- return ret;
-}
-
void add_submodule_odb_by_path(const char *path)
{
string_list_insert(&added_submodule_odb_paths, xstrdup(path));
@@ -782,19 +763,6 @@ const struct submodule *submodule_from_ce(const struct cache_entry *ce)
return submodule_from_path(the_repository, null_oid(), ce->name);
}
-static struct oid_array *submodule_commits(struct string_list *submodules,
- const char *name)
-{
- struct string_list_item *item;
-
- item = string_list_insert(submodules, name);
- if (item->util)
- return (struct oid_array *) item->util;
-
- /* NEEDSWORK: should we have oid_array_init()? */
- item->util = xcalloc(1, sizeof(struct oid_array));
- return (struct oid_array *) item->util;
-}
struct collect_changed_submodules_cb_data {
struct repository *repo;
@@ -819,6 +787,52 @@ static const char *default_name_or_path(const char *path_or_name)
return path_or_name;
}
+/*
+ * Holds relevant information for a changed submodule. Used as the .util
+ * member of the changed submodule name string_list_item.
+ *
+ * (super_oid, path) allows the submodule config to be read from _some_
+ * .gitmodules file. We store this information the first time we find a
+ * superproject commit that points to the submodule, but this is
+ * arbitrary - we can choose any (super_oid, path) that matches the
+ * submodule's name.
+ *
+ * NEEDSWORK: Storing an arbitrary commit is undesirable because we can't
+ * guarantee that we're reading the commit that the user would expect. A better
+ * scheme would be to just fetch a submodule by its name. This requires two
+ * steps:
+ * - Create a function that behaves like repo_submodule_init(), but accepts a
+ * submodule name instead of treeish_name and path. This should be easy
+ * because repo_submodule_init() internally uses the submodule's name.
+ *
+ * - Replace most instances of 'struct submodule' (which is the .gitmodules
+ * config) with just the submodule name. This is OK because we expect
+ * submodule settings to be stored in .git/config (via "git submodule init"),
+ * not .gitmodules. This also lets us delete get_non_gitmodules_submodule(),
+ * which constructs a bogus 'struct submodule' for the sake of giving a
+ * placeholder name to a gitlink.
+ */
+struct changed_submodule_data {
+ /*
+ * The first superproject commit in the rev walk that points to
+ * the submodule.
+ */
+ const struct object_id *super_oid;
+ /*
+ * Path to the submodule in the superproject commit referenced
+ * by 'super_oid'.
+ */
+ char *path;
+ /* The submodule commits that have changed in the rev walk. */
+ struct oid_array new_commits;
+};
+
+static void changed_submodule_data_clear(struct changed_submodule_data *cs_data)
+{
+ oid_array_clear(&cs_data->new_commits);
+ free(cs_data->path);
+}
+
static void collect_changed_submodules_cb(struct diff_queue_struct *q,
struct diff_options *options,
void *data)
@@ -830,9 +844,10 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q,
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
- struct oid_array *commits;
const struct submodule *submodule;
const char *name;
+ struct string_list_item *item;
+ struct changed_submodule_data *cs_data;
if (!S_ISGITLINK(p->two->mode))
continue;
@@ -859,8 +874,16 @@ static void collect_changed_submodules_cb(struct diff_queue_struct *q,
if (!name)
continue;
- commits = submodule_commits(changed, name);
- oid_array_append(commits, &p->two->oid);
+ item = string_list_insert(changed, name);
+ if (item->util)
+ cs_data = item->util;
+ else {
+ item->util = xcalloc(1, sizeof(struct changed_submodule_data));
+ cs_data = item->util;
+ cs_data->super_oid = commit_oid;
+ cs_data->path = xstrdup(p->two->path);
+ }
+ oid_array_append(&cs_data->new_commits, &p->two->oid);
}
}
@@ -907,11 +930,12 @@ static void collect_changed_submodules(struct repository *r,
reset_revision_walk();
}
-static void free_submodules_oids(struct string_list *submodules)
+static void free_submodules_data(struct string_list *submodules)
{
struct string_list_item *item;
for_each_string_list_item(item, submodules)
- oid_array_clear((struct oid_array *) item->util);
+ changed_submodule_data_clear(item->util);
+
string_list_clear(submodules, 1);
}
@@ -932,6 +956,7 @@ struct has_commit_data {
struct repository *repo;
int result;
const char *path;
+ const struct object_id *super_oid;
};
static int check_has_commit(const struct object_id *oid, void *data)
@@ -940,9 +965,10 @@ static int check_has_commit(const struct object_id *oid, void *data)
struct repository subrepo;
enum object_type type;
- if (repo_submodule_init(&subrepo, cb->repo, cb->path, null_oid())) {
+ if (repo_submodule_init(&subrepo, cb->repo, cb->path, cb->super_oid)) {
cb->result = 0;
- goto cleanup;
+ /* subrepo failed to init, so don't clean it up. */
+ return 0;
}
type = oid_object_info(&subrepo, oid, NULL);
@@ -968,21 +994,15 @@ cleanup:
static int submodule_has_commits(struct repository *r,
const char *path,
+ const struct object_id *super_oid,
struct oid_array *commits)
{
- struct has_commit_data has_commit = { r, 1, path };
-
- /*
- * Perform a cheap, but incorrect check for the existence of 'commits'.
- * This is done by adding the submodule's object store to the in-core
- * object store, and then querying for each commit's existence. If we
- * do not have the commit object anywhere, there is no chance we have
- * it in the object store of the correct submodule and have it
- * reachable from a ref, so we can fail early without spawning rev-list
- * which is expensive.
- */
- if (add_submodule_odb(path))
- return 0;
+ struct has_commit_data has_commit = {
+ .repo = r,
+ .result = 1,
+ .path = path,
+ .super_oid = super_oid
+ };
oid_array_for_each_unique(commits, check_has_commit, &has_commit);
@@ -1017,7 +1037,7 @@ static int submodule_needs_pushing(struct repository *r,
const char *path,
struct oid_array *commits)
{
- if (!submodule_has_commits(r, path, commits))
+ if (!submodule_has_commits(r, path, null_oid(), commits))
/*
* NOTE: We do consider it safe to return "no" here. The
* correct answer would be "We do not know" instead of
@@ -1077,7 +1097,7 @@ int find_unpushed_submodules(struct repository *r,
collect_changed_submodules(r, &submodules, &argv);
for_each_string_list_item(name, &submodules) {
- struct oid_array *commits = name->util;
+ struct changed_submodule_data *cs_data = name->util;
const struct submodule *submodule;
const char *path = NULL;
@@ -1090,11 +1110,11 @@ int find_unpushed_submodules(struct repository *r,
if (!path)
continue;
- if (submodule_needs_pushing(r, path, commits))
+ if (submodule_needs_pushing(r, path, &cs_data->new_commits))
string_list_insert(needs_pushing, path);
}
- free_submodules_oids(&submodules);
+ free_submodules_data(&submodules);
strvec_clear(&argv);
return needs_pushing->nr;
@@ -1240,14 +1260,36 @@ void check_for_new_submodule_commits(struct object_id *oid)
oid_array_append(&ref_tips_after_fetch, oid);
}
+/*
+ * Returns 1 if there is at least one submodule gitdir in
+ * $GIT_DIR/modules and 0 otherwise. This follows
+ * submodule_name_to_gitdir(), which looks for submodules in
+ * $GIT_DIR/modules, not $GIT_COMMON_DIR.
+ *
+ * A submodule can be moved to $GIT_DIR/modules manually by running "git
+ * submodule absorbgitdirs", or it may be initialized there by "git
+ * submodule update".
+ */
+static int repo_has_absorbed_submodules(struct repository *r)
+{
+ int ret;
+ struct strbuf buf = STRBUF_INIT;
+
+ strbuf_repo_git_path(&buf, r, "modules/");
+ ret = file_exists(buf.buf) && !is_empty_dir(buf.buf);
+ strbuf_release(&buf);
+ return ret;
+}
+
static void calculate_changed_submodule_paths(struct repository *r,
struct string_list *changed_submodule_names)
{
struct strvec argv = STRVEC_INIT;
struct string_list_item *name;
- /* No need to check if there are no submodules configured */
- if (!submodule_from_path(r, NULL, NULL))
+ /* No need to check if no submodules would be fetched */
+ if (!submodule_from_path(r, NULL, NULL) &&
+ !repo_has_absorbed_submodules(r))
return;
strvec_push(&argv, "--"); /* argv[0] program name */
@@ -1264,7 +1306,7 @@ static void calculate_changed_submodule_paths(struct repository *r,
collect_changed_submodules(r, changed_submodule_names, &argv);
for_each_string_list_item(name, changed_submodule_names) {
- struct oid_array *commits = name->util;
+ struct changed_submodule_data *cs_data = name->util;
const struct submodule *submodule;
const char *path = NULL;
@@ -1277,8 +1319,8 @@ static void calculate_changed_submodule_paths(struct repository *r,
if (!path)
continue;
- if (submodule_has_commits(r, path, commits)) {
- oid_array_clear(commits);
+ if (submodule_has_commits(r, path, null_oid(), &cs_data->new_commits)) {
+ changed_submodule_data_clear(cs_data);
*name->string = '\0';
}
}
@@ -1315,12 +1357,21 @@ int submodule_touches_in_range(struct repository *r,
strvec_clear(&args);
- free_submodules_oids(&subs);
+ free_submodules_data(&subs);
return ret;
}
struct submodule_parallel_fetch {
- int count;
+ /*
+ * The index of the last index entry processed by
+ * get_fetch_task_from_index().
+ */
+ int index_count;
+ /*
+ * The index of the last string_list entry processed by
+ * get_fetch_task_from_changed().
+ */
+ int changed_count;
struct strvec args;
struct repository *r;
const char *prefix;
@@ -1329,7 +1380,16 @@ struct submodule_parallel_fetch {
int quiet;
int result;
+ /*
+ * Names of submodules that have new commits. Generated by
+ * walking the newly fetched superproject commits.
+ */
struct string_list changed_submodule_names;
+ /*
+ * Names of submodules that have already been processed. Lets us
+ * avoid fetching the same submodule more than once.
+ */
+ struct string_list seen_submodule_names;
/* Pending fetches by OIDs */
struct fetch_task **oid_fetch_tasks;
@@ -1340,6 +1400,7 @@ struct submodule_parallel_fetch {
#define SPF_INIT { \
.args = STRVEC_INIT, \
.changed_submodule_names = STRING_LIST_INIT_DUP, \
+ .seen_submodule_names = STRING_LIST_INIT_DUP, \
.submodules_with_errors = STRBUF_INIT, \
}
@@ -1376,6 +1437,8 @@ struct fetch_task {
struct repository *repo;
const struct submodule *sub;
unsigned free_sub : 1; /* Do we need to free the submodule? */
+ const char *default_argv; /* The default fetch mode. */
+ struct strvec git_args; /* Args for the child git process. */
struct oid_array *commits; /* Ensure these commits are fetched */
};
@@ -1401,31 +1464,6 @@ static const struct submodule *get_non_gitmodules_submodule(const char *path)
return (const struct submodule *) ret;
}
-static struct fetch_task *fetch_task_create(struct repository *r,
- const char *path)
-{
- struct fetch_task *task = xmalloc(sizeof(*task));
- memset(task, 0, sizeof(*task));
-
- task->sub = submodule_from_path(r, null_oid(), path);
- if (!task->sub) {
- /*
- * No entry in .gitmodules? Technically not a submodule,
- * but historically we supported repositories that happen to be
- * in-place where a gitlink is. Keep supporting them.
- */
- task->sub = get_non_gitmodules_submodule(path);
- if (!task->sub) {
- free(task);
- return NULL;
- }
-
- task->free_sub = 1;
- }
-
- return task;
-}
-
static void fetch_task_release(struct fetch_task *p)
{
if (p->free_sub)
@@ -1436,14 +1474,17 @@ static void fetch_task_release(struct fetch_task *p)
if (p->repo)
repo_clear(p->repo);
FREE_AND_NULL(p->repo);
+
+ strvec_clear(&p->git_args);
}
static struct repository *get_submodule_repo_for(struct repository *r,
- const char *path)
+ const char *path,
+ const struct object_id *treeish_name)
{
struct repository *ret = xmalloc(sizeof(*ret));
- if (repo_submodule_init(ret, r, path, null_oid())) {
+ if (repo_submodule_init(ret, r, path, treeish_name)) {
free(ret);
return NULL;
}
@@ -1451,67 +1492,83 @@ static struct repository *get_submodule_repo_for(struct repository *r,
return ret;
}
-static int get_next_submodule(struct child_process *cp,
- struct strbuf *err, void *data, void **task_cb)
+static struct fetch_task *fetch_task_create(struct submodule_parallel_fetch *spf,
+ const char *path,
+ const struct object_id *treeish_name)
{
- struct submodule_parallel_fetch *spf = data;
+ struct fetch_task *task = xmalloc(sizeof(*task));
+ memset(task, 0, sizeof(*task));
+
+ task->sub = submodule_from_path(spf->r, treeish_name, path);
- for (; spf->count < spf->r->index->cache_nr; spf->count++) {
- const struct cache_entry *ce = spf->r->index->cache[spf->count];
- const char *default_argv;
+ if (!task->sub) {
+ /*
+ * No entry in .gitmodules? Technically not a submodule,
+ * but historically we supported repositories that happen to be
+ * in-place where a gitlink is. Keep supporting them.
+ */
+ task->sub = get_non_gitmodules_submodule(path);
+ if (!task->sub)
+ goto cleanup;
+
+ task->free_sub = 1;
+ }
+
+ if (string_list_lookup(&spf->seen_submodule_names, task->sub->name))
+ goto cleanup;
+
+ switch (get_fetch_recurse_config(task->sub, spf))
+ {
+ default:
+ case RECURSE_SUBMODULES_DEFAULT:
+ case RECURSE_SUBMODULES_ON_DEMAND:
+ if (!task->sub ||
+ !string_list_lookup(
+ &spf->changed_submodule_names,
+ task->sub->name))
+ goto cleanup;
+ task->default_argv = "on-demand";
+ break;
+ case RECURSE_SUBMODULES_ON:
+ task->default_argv = "yes";
+ break;
+ case RECURSE_SUBMODULES_OFF:
+ goto cleanup;
+ }
+
+ task->repo = get_submodule_repo_for(spf->r, path, treeish_name);
+
+ return task;
+
+ cleanup:
+ fetch_task_release(task);
+ free(task);
+ return NULL;
+}
+
+static struct fetch_task *
+get_fetch_task_from_index(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
+{
+ for (; spf->index_count < spf->r->index->cache_nr; spf->index_count++) {
+ const struct cache_entry *ce =
+ spf->r->index->cache[spf->index_count];
struct fetch_task *task;
if (!S_ISGITLINK(ce->ce_mode))
continue;
- task = fetch_task_create(spf->r, ce->name);
+ task = fetch_task_create(spf, ce->name, null_oid());
if (!task)
continue;
- switch (get_fetch_recurse_config(task->sub, spf))
- {
- default:
- case RECURSE_SUBMODULES_DEFAULT:
- case RECURSE_SUBMODULES_ON_DEMAND:
- if (!task->sub ||
- !string_list_lookup(
- &spf->changed_submodule_names,
- task->sub->name))
- continue;
- default_argv = "on-demand";
- break;
- case RECURSE_SUBMODULES_ON:
- default_argv = "yes";
- break;
- case RECURSE_SUBMODULES_OFF:
- continue;
- }
-
- task->repo = get_submodule_repo_for(spf->r, task->sub->path);
if (task->repo) {
- struct strbuf submodule_prefix = STRBUF_INIT;
- child_process_init(cp);
- cp->dir = task->repo->gitdir;
- prepare_submodule_repo_env_in_gitdir(&cp->env_array);
- cp->git_cmd = 1;
if (!spf->quiet)
strbuf_addf(err, _("Fetching submodule %s%s\n"),
spf->prefix, ce->name);
- strvec_init(&cp->args);
- strvec_pushv(&cp->args, spf->args.v);
- strvec_push(&cp->args, default_argv);
- strvec_push(&cp->args, "--submodule-prefix");
- strbuf_addf(&submodule_prefix, "%s%s/",
- spf->prefix,
- task->sub->path);
- strvec_push(&cp->args, submodule_prefix.buf);
-
- spf->count++;
- *task_cb = task;
-
- strbuf_release(&submodule_prefix);
- return 1;
+ spf->index_count++;
+ return task;
} else {
struct strbuf empty_submodule_path = STRBUF_INIT;
@@ -1535,6 +1592,111 @@ static int get_next_submodule(struct child_process *cp,
strbuf_release(&empty_submodule_path);
}
}
+ return NULL;
+}
+
+static struct fetch_task *
+get_fetch_task_from_changed(struct submodule_parallel_fetch *spf,
+ struct strbuf *err)
+{
+ for (; spf->changed_count < spf->changed_submodule_names.nr;
+ spf->changed_count++) {
+ struct string_list_item item =
+ spf->changed_submodule_names.items[spf->changed_count];
+ struct changed_submodule_data *cs_data = item.util;
+ struct fetch_task *task;
+
+ if (!is_tree_submodule_active(spf->r, cs_data->super_oid,cs_data->path))
+ continue;
+
+ task = fetch_task_create(spf, cs_data->path,
+ cs_data->super_oid);
+ if (!task)
+ continue;
+
+ if (!task->repo) {
+ strbuf_addf(err, _("Could not access submodule '%s' at commit %s\n"),
+ cs_data->path,
+ find_unique_abbrev(cs_data->super_oid, DEFAULT_ABBREV));
+
+ fetch_task_release(task);
+ free(task);
+ continue;
+ }
+
+ if (!spf->quiet)
+ strbuf_addf(err,
+ _("Fetching submodule %s%s at commit %s\n"),
+ spf->prefix, task->sub->path,
+ find_unique_abbrev(cs_data->super_oid,
+ DEFAULT_ABBREV));
+
+ spf->changed_count++;
+ /*
+ * NEEDSWORK: Submodules set/unset a value for
+ * core.worktree when they are populated/unpopulated by
+ * "git checkout" (and similar commands, see
+ * submodule_move_head() and
+ * connect_work_tree_and_git_dir()), but if the
+ * submodule is unpopulated in another way (e.g. "git
+ * rm", "rm -r"), core.worktree will still be set even
+ * though the directory doesn't exist, and the child
+ * process will crash while trying to chdir into the
+ * nonexistent directory.
+ *
+ * In this case, we know that the submodule has no
+ * working tree, so we can work around this by
+ * setting "--work-tree=." (--bare does not work because
+ * worktree settings take precedence over bare-ness).
+ * However, this is not necessarily true in other cases,
+ * so a generalized solution is still necessary.
+ *
+ * Possible solutions:
+ * - teach "git [add|rm]" to unset core.worktree and
+ * discourage users from removing submodules without
+ * using a Git command.
+ * - teach submodule child processes to ignore stale
+ * core.worktree values.
+ */
+ strvec_push(&task->git_args, "--work-tree=.");
+ return task;
+ }
+ return NULL;
+}
+
+static int get_next_submodule(struct child_process *cp, struct strbuf *err,
+ void *data, void **task_cb)
+{
+ struct submodule_parallel_fetch *spf = data;
+ struct fetch_task *task =
+ get_fetch_task_from_index(spf, err);
+ if (!task)
+ task = get_fetch_task_from_changed(spf, err);
+
+ if (task) {
+ struct strbuf submodule_prefix = STRBUF_INIT;
+
+ child_process_init(cp);
+ cp->dir = task->repo->gitdir;
+ prepare_submodule_repo_env_in_gitdir(&cp->env_array);
+ cp->git_cmd = 1;
+ strvec_init(&cp->args);
+ if (task->git_args.nr)
+ strvec_pushv(&cp->args, task->git_args.v);
+ strvec_pushv(&cp->args, spf->args.v);
+ strvec_push(&cp->args, task->default_argv);
+ strvec_push(&cp->args, "--submodule-prefix");
+
+ strbuf_addf(&submodule_prefix, "%s%s/",
+ spf->prefix,
+ task->sub->path);
+ strvec_push(&cp->args, submodule_prefix.buf);
+ *task_cb = task;
+
+ strbuf_release(&submodule_prefix);
+ string_list_insert(&spf->seen_submodule_names, task->sub->name);
+ return 1;
+ }
if (spf->oid_fetch_tasks_nr) {
struct fetch_task *task =
@@ -1597,7 +1759,7 @@ static int fetch_finish(int retvalue, struct strbuf *err,
struct fetch_task *task = task_cb;
struct string_list_item *it;
- struct oid_array *commits;
+ struct changed_submodule_data *cs_data;
if (!task || !task->sub)
BUG("callback cookie bogus");
@@ -1625,14 +1787,14 @@ static int fetch_finish(int retvalue, struct strbuf *err,
/* Could be an unchanged submodule, not contained in the list */
goto out;
- commits = it->util;
- oid_array_filter(commits,
+ cs_data = it->util;
+ oid_array_filter(&cs_data->new_commits,
commit_missing_in_sub,
task->repo);
/* Are there commits we want, but do not exist? */
- if (commits->nr) {
- task->commits = commits;
+ if (cs_data->new_commits.nr) {
+ task->commits = &cs_data->new_commits;
ALLOC_GROW(spf->oid_fetch_tasks,
spf->oid_fetch_tasks_nr + 1,
spf->oid_fetch_tasks_alloc);
@@ -1647,11 +1809,11 @@ out:
return 0;
}
-int fetch_populated_submodules(struct repository *r,
- const struct strvec *options,
- const char *prefix, int command_line_option,
- int default_option,
- int quiet, int max_parallel_jobs)
+int fetch_submodules(struct repository *r,
+ const struct strvec *options,
+ const char *prefix, int command_line_option,
+ int default_option,
+ int quiet, int max_parallel_jobs)
{
int i;
struct submodule_parallel_fetch spf = SPF_INIT;
@@ -1690,7 +1852,7 @@ int fetch_populated_submodules(struct repository *r,
strvec_clear(&spf.args);
out:
- free_submodules_oids(&spf.changed_submodule_names);
+ free_submodules_data(&spf.changed_submodule_names);
return spf.result;
}