diff options
Diffstat (limited to 'merge-ort.c')
-rw-r--r-- | merge-ort.c | 658 |
1 files changed, 540 insertions, 118 deletions
diff --git a/merge-ort.c b/merge-ort.c index 8cfa0ccd3c..e5456f4722 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -32,6 +32,7 @@ #include "promisor-remote.h" #include "revision.h" #include "strmap.h" +#include "submodule-config.h" #include "submodule.h" #include "tree.h" #include "unpack-trees.h" @@ -62,6 +63,53 @@ struct traversal_callback_data { struct name_entry names[3]; }; +struct deferred_traversal_data { + /* + * possible_trivial_merges: directories to be explored only when needed + * + * possible_trivial_merges is a map of directory names to + * dir_rename_mask. When we detect that a directory is unchanged on + * one side, we can sometimes resolve the directory without recursing + * into it. Renames are the only things that can prevent such an + * optimization. However, for rename sources: + * - If no parent directory needed directory rename detection, then + * no path under such a directory can be a relevant_source. + * and for rename destinations: + * - If no cached rename has a target path under the directory AND + * - If there are no unpaired relevant_sources elsewhere in the + * repository + * then we don't need any path under this directory for a rename + * destination. The only way to know the last item above is to defer + * handling such directories until the end of collect_merge_info(), + * in handle_deferred_entries(). + * + * For each we store dir_rename_mask, since that's the only bit of + * information we need, other than the path, to resume the recursive + * traversal. + */ + struct strintmap possible_trivial_merges; + + /* + * trivial_merges_okay: if trivial directory merges are okay + * + * See possible_trivial_merges above. The "no unpaired + * relevant_sources elsewhere in the repository" is a single boolean + * per merge side, which we store here. Note that while 0 means no, + * 1 only means "maybe" rather than "yes"; we optimistically set it + * to 1 initially and only clear when we determine it is unsafe to + * do trivial directory merges. + */ + unsigned trivial_merges_okay; + + /* + * target_dirs: ancestor directories of rename targets + * + * target_dirs contains all directory names that are an ancestor of + * any rename destination. + */ + struct strset target_dirs; +}; + struct rename_info { /* * All variables that are arrays of size 3 correspond to data tracked @@ -119,6 +167,8 @@ struct rename_info { */ struct strintmap relevant_sources[3]; + struct deferred_traversal_data deferred[3]; + /* * dir_rename_mask: * 0: optimization removing unmodified potential rename source okay @@ -164,6 +214,7 @@ struct rename_info { * MERGE_SIDE2: cached data from side2 can be reused * MERGE_SIDE1: cached data from side1 can be reused * 0: no cached data can be reused + * -1: See redo_after_renames; both sides can be reused. */ int cached_pairs_valid_side; @@ -210,6 +261,28 @@ struct rename_info { struct strset cached_irrelevant[3]; /* + * redo_after_renames: optimization flag for "restarting" the merge + * + * Sometimes it pays to detect renames, cache them, and then + * restart the merge operation from the beginning. The reason for + * this is that when we know where all the renames are, we know + * whether a certain directory has any paths under it affected -- + * and if a directory is not affected then it permits us to do + * trivial tree merging in more cases. Doing trivial tree merging + * prevents the need to run process_entry() on every path + * underneath trees that can be trivially merged, and + * process_entry() is more expensive than collect_merge_info() -- + * plus, the second collect_merge_info() will be much faster since + * it doesn't have to recurse into the relevant trees. + * + * Values for this flag: + * 0 = don't bother, not worth it (or conditions not yet checked) + * 1 = conditions for optimization met, optimization worthwhile + * 2 = we already did it (don't restart merge yet again) + */ + unsigned redo_after_renames; + + /* * needed_limit: value needed for inexact rename detection to run * * If the current rename limit wasn't high enough for inexact @@ -231,8 +304,6 @@ struct merge_options_internal { * * these keys serve to intern all the path strings, which allows * us to do pointer comparison on directory names instead of * strcmp; we just have to be careful to use the interned strings. - * (Technically paths_to_free may track some strings that were - * removed from froms paths.) * * The values of paths: * * either a pointer to a merged_info, or a conflict_info struct @@ -268,14 +339,14 @@ struct merge_options_internal { struct strmap conflicted; /* - * paths_to_free: additional list of strings to free + * pool: memory pool for fast allocation/deallocation * - * If keys are removed from "paths", they are added to paths_to_free - * to ensure they are later freed. We avoid free'ing immediately since - * other places (e.g. conflict_info.pathnames[]) may still be - * referencing these paths. + * We allocate room for lots of filenames and auxiliary data + * structures in merge_options_internal, and it tends to all be + * freed together too. Using a memory pool for these provides a + * nice speedup. */ - struct string_list paths_to_free; + struct mem_pool pool; /* * output: special messages and conflict notices for various paths @@ -447,60 +518,47 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, { struct rename_info *renames = &opti->renames; int i; - void (*strmap_func)(struct strmap *, int) = + void (*strmap_clear_func)(struct strmap *, int) = reinitialize ? strmap_partial_clear : strmap_clear; - void (*strintmap_func)(struct strintmap *) = + void (*strintmap_clear_func)(struct strintmap *) = reinitialize ? strintmap_partial_clear : strintmap_clear; - void (*strset_func)(struct strset *) = + void (*strset_clear_func)(struct strset *) = reinitialize ? strset_partial_clear : strset_clear; - /* - * We marked opti->paths with strdup_strings = 0, so that we - * wouldn't have to make another copy of the fullpath created by - * make_traverse_path from setup_path_info(). But, now that we've - * used it and have no other references to these strings, it is time - * to deallocate them. - */ - free_strmap_strings(&opti->paths); - strmap_func(&opti->paths, 1); + strmap_clear_func(&opti->paths, 0); /* * All keys and values in opti->conflicted are a subset of those in * opti->paths. We don't want to deallocate anything twice, so we * don't free the keys and we pass 0 for free_values. */ - strmap_func(&opti->conflicted, 0); - - /* - * opti->paths_to_free is similar to opti->paths; we created it with - * strdup_strings = 0 to avoid making _another_ copy of the fullpath - * but now that we've used it and have no other references to these - * strings, it is time to deallocate them. We do so by temporarily - * setting strdup_strings to 1. - */ - opti->paths_to_free.strdup_strings = 1; - string_list_clear(&opti->paths_to_free, 0); - opti->paths_to_free.strdup_strings = 0; + strmap_clear_func(&opti->conflicted, 0); if (opti->attr_index.cache_nr) /* true iff opt->renormalize */ discard_index(&opti->attr_index); /* Free memory used by various renames maps */ for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) { - strintmap_func(&renames->dirs_removed[i]); - strmap_func(&renames->dir_renames[i], 0); - strintmap_func(&renames->relevant_sources[i]); + strintmap_clear_func(&renames->dirs_removed[i]); + strmap_clear_func(&renames->dir_renames[i], 0); + strintmap_clear_func(&renames->relevant_sources[i]); if (!reinitialize) assert(renames->cached_pairs_valid_side == 0); - if (i != renames->cached_pairs_valid_side) { - strset_func(&renames->cached_target_names[i]); - strmap_func(&renames->cached_pairs[i], 1); - strset_func(&renames->cached_irrelevant[i]); + if (i != renames->cached_pairs_valid_side && + -1 != renames->cached_pairs_valid_side) { + strset_clear_func(&renames->cached_target_names[i]); + strmap_clear_func(&renames->cached_pairs[i], 1); + strset_clear_func(&renames->cached_irrelevant[i]); partial_clear_dir_rename_count(&renames->dir_rename_count[i]); if (!reinitialize) strmap_clear(&renames->dir_rename_count[i], 1); } } + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) { + strintmap_clear_func(&renames->deferred[i].possible_trivial_merges); + strset_clear_func(&renames->deferred[i].target_dirs); + renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */ + } renames->cached_pairs_valid_side = 0; renames->dir_rename_mask = 0; @@ -525,11 +583,14 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, strmap_clear(&opti->output, 0); } + mem_pool_discard(&opti->pool, 0); + /* Clean out callback_data as well. */ FREE_AND_NULL(renames->callback_data); renames->callback_data_nr = renames->callback_data_alloc = 0; } +__attribute__((format (printf, 2, 3))) static int err(struct merge_options *opt, const char *err, ...) { va_list params; @@ -586,6 +647,36 @@ static void path_msg(struct merge_options *opt, strbuf_addch(sb, '\n'); } +static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool, + const char *path) +{ + /* Similar to alloc_filespec(), but allocate from pool and reuse path */ + struct diff_filespec *spec; + + spec = mem_pool_calloc(pool, 1, sizeof(*spec)); + spec->path = (char*)path; /* spec won't modify it */ + + spec->count = 1; + spec->is_binary = -1; + return spec; +} + +static struct diff_filepair *pool_diff_queue(struct mem_pool *pool, + struct diff_queue_struct *queue, + struct diff_filespec *one, + struct diff_filespec *two) +{ + /* Same code as diff_queue(), except allocate from pool */ + struct diff_filepair *dp; + + dp = mem_pool_calloc(pool, 1, sizeof(*dp)); + dp->one = one; + dp->two = two; + if (queue) + diff_q(queue, dp); + return dp; +} + /* add a string to a strbuf, but converting "/" to "_" */ static void add_flattened_path(struct strbuf *out, const char *s) { @@ -714,8 +805,9 @@ static void setup_path_info(struct merge_options *opt, assert(!df_conflict || !resolved); /* df_conflict implies !resolved */ assert(resolved == (merged_version != NULL)); - mi = xcalloc(1, resolved ? sizeof(struct merged_info) : - sizeof(struct conflict_info)); + mi = mem_pool_calloc(&opt->priv->pool, 1, + resolved ? sizeof(struct merged_info) : + sizeof(struct conflict_info)); mi->directory_name = current_dir_name; mi->basename_offset = current_dir_name_len; mi->clean = !!resolved; @@ -812,11 +904,11 @@ static void add_pair(struct merge_options *opt, return; } - one = alloc_filespec(pathname); - two = alloc_filespec(pathname); + one = pool_alloc_filespec(&opt->priv->pool, pathname); + two = pool_alloc_filespec(&opt->priv->pool, pathname); fill_filespec(is_add ? two : one, &names[names_idx].oid, 1, names[names_idx].mode); - diff_queue(&renames->pairs[side], one, two); + pool_diff_queue(&opt->priv->pool, &renames->pairs[side], one, two); } static void collect_rename_info(struct merge_options *opt, @@ -1007,7 +1099,7 @@ static int collect_merge_info_callback(int n, len = traverse_path_len(info, p->pathlen); /* +1 in both of the following lines to include the NUL byte */ - fullpath = xmalloc(len + 1); + fullpath = mem_pool_alloc(&opt->priv->pool, len + 1); make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen); /* @@ -1018,20 +1110,67 @@ static int collect_merge_info_callback(int n, if (side1_matches_mbase && side2_matches_mbase) { /* mbase, side1, & side2 all match; use mbase as resolution */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, - names, names+0, mbase_null, 0, + names, names+0, mbase_null, 0 /* df_conflict */, + filemask, dirmask, 1 /* resolved */); + return mask; + } + + /* + * If the sides match, and all three paths are present and are + * files, then we can take either as the resolution. We can't do + * this with trees, because there may be rename sources from the + * merge_base. + */ + if (sides_match && filemask == 0x07) { + /* use side1 (== side2) version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+1, side1_null, 0, filemask, dirmask, 1); return mask; } /* - * Gather additional information used in rename detection. + * If side1 matches mbase and all three paths are present and are + * files, then we can use side2 as the resolution. We cannot + * necessarily do so this for trees, because there may be rename + * destinations within side2. + */ + if (side1_matches_mbase && filemask == 0x07) { + /* use side2 version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+2, side2_null, 0, + filemask, dirmask, 1); + return mask; + } + + /* Similar to above but swapping sides 1 and 2 */ + if (side2_matches_mbase && filemask == 0x07) { + /* use side1 version as resolution */ + setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, + names, names+1, side1_null, 0, + filemask, dirmask, 1); + return mask; + } + + /* + * Sometimes we can tell that a source path need not be included in + * rename detection -- namely, whenever either + * side1_matches_mbase && side2_null + * or + * side2_matches_mbase && side1_null + * However, we call collect_rename_info() even in those cases, + * because exact renames are cheap and would let us remove both a + * source and destination path. We'll cull the unneeded sources + * later. */ collect_rename_info(opt, names, dirname, fullpath, filemask, dirmask, match_mask); /* - * Record information about the path so we can resolve later in - * process_entries. + * None of the special cases above matched, so we have a + * provisional conflict. (Rename detection might allow us to + * unconflict some more cases, but that comes later so all we can + * do now is record the different non-null file hashes.) */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, names, NULL, 0, df_conflict, filemask, dirmask, 0); @@ -1046,8 +1185,36 @@ static int collect_merge_info_callback(int n, struct tree_desc t[3]; void *buf[3] = {NULL, NULL, NULL}; const char *original_dir_name; - int i, ret; + int i, ret, side; + + /* + * Check for whether we can avoid recursing due to one side + * matching the merge base. The side that does NOT match is + * the one that might have a rename destination we need. + */ + assert(!side1_matches_mbase || !side2_matches_mbase); + side = side1_matches_mbase ? MERGE_SIDE2 : + side2_matches_mbase ? MERGE_SIDE1 : MERGE_BASE; + if (filemask == 0 && (dirmask == 2 || dirmask == 4)) { + /* + * Also defer recursing into new directories; set up a + * few variables to let us do so. + */ + ci->match_mask = (7 - dirmask); + side = dirmask / 2; + } + if (renames->dir_rename_mask != 0x07 && + side != MERGE_BASE && + renames->deferred[side].trivial_merges_okay && + !strset_contains(&renames->deferred[side].target_dirs, + pi.string)) { + strintmap_set(&renames->deferred[side].possible_trivial_merges, + pi.string, renames->dir_rename_mask); + renames->dir_rename_mask = prev_dir_rename_mask; + return mask; + } + /* We need to recurse */ ci->match_mask &= filemask; newinfo = *info; newinfo.prev = info; @@ -1101,6 +1268,192 @@ static int collect_merge_info_callback(int n, return mask; } +static void resolve_trivial_directory_merge(struct conflict_info *ci, int side) +{ + VERIFY_CI(ci); + assert((side == 1 && ci->match_mask == 5) || + (side == 2 && ci->match_mask == 3)); + oidcpy(&ci->merged.result.oid, &ci->stages[side].oid); + ci->merged.result.mode = ci->stages[side].mode; + ci->merged.is_null = is_null_oid(&ci->stages[side].oid); + ci->match_mask = 0; + ci->merged.clean = 1; /* (ci->filemask == 0); */ +} + +static int handle_deferred_entries(struct merge_options *opt, + struct traverse_info *info) +{ + struct rename_info *renames = &opt->priv->renames; + struct hashmap_iter iter; + struct strmap_entry *entry; + int side, ret = 0; + int path_count_before, path_count_after = 0; + + path_count_before = strmap_get_size(&opt->priv->paths); + for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) { + unsigned optimization_okay = 1; + struct strintmap copy; + + /* Loop over the set of paths we need to know rename info for */ + strset_for_each_entry(&renames->relevant_sources[side], + &iter, entry) { + char *rename_target, *dir, *dir_marker; + struct strmap_entry *e; + + /* + * If we don't know delete/rename info for this path, + * then we need to recurse into all trees to get all + * adds to make sure we have it. + */ + if (strset_contains(&renames->cached_irrelevant[side], + entry->key)) + continue; + e = strmap_get_entry(&renames->cached_pairs[side], + entry->key); + if (!e) { + optimization_okay = 0; + break; + } + + /* If this is a delete, we have enough info already */ + rename_target = e->value; + if (!rename_target) + continue; + + /* If we already walked the rename target, we're good */ + if (strmap_contains(&opt->priv->paths, rename_target)) + continue; + + /* + * Otherwise, we need to get a list of directories that + * will need to be recursed into to get this + * rename_target. + */ + dir = xstrdup(rename_target); + while ((dir_marker = strrchr(dir, '/'))) { + *dir_marker = '\0'; + if (strset_contains(&renames->deferred[side].target_dirs, + dir)) + break; + strset_add(&renames->deferred[side].target_dirs, + dir); + } + free(dir); + } + renames->deferred[side].trivial_merges_okay = optimization_okay; + /* + * We need to recurse into any directories in + * possible_trivial_merges[side] found in target_dirs[side]. + * But when we recurse, we may need to queue up some of the + * subdirectories for possible_trivial_merges[side]. Since + * we can't safely iterate through a hashmap while also adding + * entries, move the entries into 'copy', iterate over 'copy', + * and then we'll also iterate anything added into + * possible_trivial_merges[side] once this loop is done. + */ + copy = renames->deferred[side].possible_trivial_merges; + strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges, + 0, + &opt->priv->pool, + 0); + strintmap_for_each_entry(©, &iter, entry) { + const char *path = entry->key; + unsigned dir_rename_mask = (intptr_t)entry->value; + struct conflict_info *ci; + unsigned dirmask; + struct tree_desc t[3]; + void *buf[3] = {NULL,}; + int i; + + ci = strmap_get(&opt->priv->paths, path); + VERIFY_CI(ci); + dirmask = ci->dirmask; + + if (optimization_okay && + !strset_contains(&renames->deferred[side].target_dirs, + path)) { + resolve_trivial_directory_merge(ci, side); + continue; + } + + info->name = path; + info->namelen = strlen(path); + info->pathlen = info->namelen + 1; + + for (i = 0; i < 3; i++, dirmask >>= 1) { + if (i == 1 && ci->match_mask == 3) + t[1] = t[0]; + else if (i == 2 && ci->match_mask == 5) + t[2] = t[0]; + else if (i == 2 && ci->match_mask == 6) + t[2] = t[1]; + else { + const struct object_id *oid = NULL; + if (dirmask & 1) + oid = &ci->stages[i].oid; + buf[i] = fill_tree_descriptor(opt->repo, + t+i, oid); + } + } + + ci->match_mask &= ci->filemask; + opt->priv->current_dir_name = path; + renames->dir_rename_mask = dir_rename_mask; + if (renames->dir_rename_mask == 0 || + renames->dir_rename_mask == 0x07) + ret = traverse_trees(NULL, 3, t, info); + else + ret = traverse_trees_wrapper(NULL, 3, t, info); + + for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) + free(buf[i]); + + if (ret < 0) + return ret; + } + strintmap_clear(©); + strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges, + &iter, entry) { + const char *path = entry->key; + struct conflict_info *ci; + + ci = strmap_get(&opt->priv->paths, path); + VERIFY_CI(ci); + + assert(renames->deferred[side].trivial_merges_okay && + !strset_contains(&renames->deferred[side].target_dirs, + path)); + resolve_trivial_directory_merge(ci, side); + } + if (!optimization_okay || path_count_after) + path_count_after = strmap_get_size(&opt->priv->paths); + } + if (path_count_after) { + /* + * The choice of wanted_factor here does not affect + * correctness, only performance. When the + * path_count_after / path_count_before + * ratio is high, redoing after renames is a big + * performance boost. I suspect that redoing is a wash + * somewhere near a value of 2, and below that redoing will + * slow things down. I applied a fudge factor and picked + * 3; see the commit message when this was introduced for + * back of the envelope calculations for this ratio. + */ + const int wanted_factor = 3; + + /* We should only redo collect_merge_info one time */ + assert(renames->redo_after_renames == 0); + + if (path_count_after / path_count_before >= wanted_factor) { + renames->redo_after_renames = 1; + renames->cached_pairs_valid_side = -1; + } + } else if (renames->redo_after_renames == 2) + renames->redo_after_renames = 0; + return ret; +} + static int collect_merge_info(struct merge_options *opt, struct tree *merge_base, struct tree *side1, @@ -1126,6 +1479,8 @@ static int collect_merge_info(struct merge_options *opt, trace2_region_enter("merge", "traverse_trees", opt->repo); ret = traverse_trees(NULL, 3, t, &info); + if (ret == 0) + ret = handle_deferred_entries(opt, &info); trace2_region_leave("merge", "traverse_trees", opt->repo); return ret; @@ -1157,7 +1512,6 @@ static int find_first_merges(struct repository *repo, xsnprintf(merged_revision, sizeof(merged_revision), "^%s", oid_to_hex(&a->object.oid)); repo_init_revisions(repo, &revs, NULL); - rev_opts.submodule = path; /* FIXME: can't handle linked worktrees in submodules yet */ revs.single_worktree = path != NULL; setup_revisions(ARRAY_SIZE(rev_args)-1, rev_args, &revs, &rev_opts); @@ -1167,7 +1521,7 @@ static int find_first_merges(struct repository *repo, die("revision walk setup failed"); while ((commit = get_revision(&revs)) != NULL) { struct object *o = &(commit->object); - if (in_merge_bases(b, commit)) + if (repo_in_merge_bases(repo, b, commit)) add_object_array(o, NULL, &merges); } reset_revision_walk(); @@ -1182,7 +1536,7 @@ static int find_first_merges(struct repository *repo, contains_another = 0; for (j = 0; j < merges.nr; j++) { struct commit *m2 = (struct commit *) merges.objects[j].item; - if (i != j && in_merge_bases(m2, m1)) { + if (i != j && repo_in_merge_bases(repo, m2, m1)) { contains_another = 1; break; } @@ -1203,10 +1557,12 @@ static int merge_submodule(struct merge_options *opt, const struct object_id *b, struct object_id *result) { + struct repository subrepo; + struct strbuf sb = STRBUF_INIT; + int ret = 0; struct commit *commit_o, *commit_a, *commit_b; int parent_count; struct object_array merges; - struct strbuf sb = STRBUF_INIT; int i; int search = !opt->priv->call_depth; @@ -1222,6 +1578,10 @@ static int merge_submodule(struct merge_options *opt, if (is_null_oid(b)) return 0; + /* + * NEEDSWORK: Remove this when all submodule object accesses are + * through explicitly specified repositores. + */ if (add_submodule_odb(path)) { path_msg(opt, path, 0, _("Failed to merge submodule %s (not checked out)"), @@ -1229,39 +1589,48 @@ static int merge_submodule(struct merge_options *opt, return 0; } - if (!(commit_o = lookup_commit_reference(opt->repo, o)) || - !(commit_a = lookup_commit_reference(opt->repo, a)) || - !(commit_b = lookup_commit_reference(opt->repo, b))) { + if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) { + path_msg(opt, path, 0, + _("Failed to merge submodule %s (not checked out)"), + path); + return 0; + } + + if (!(commit_o = lookup_commit_reference(&subrepo, o)) || + !(commit_a = lookup_commit_reference(&subrepo, a)) || + !(commit_b = lookup_commit_reference(&subrepo, b))) { path_msg(opt, path, 0, _("Failed to merge submodule %s (commits not present)"), path); - return 0; + goto cleanup; } /* check whether both changes are forward */ - if (!in_merge_bases(commit_o, commit_a) || - !in_merge_bases(commit_o, commit_b)) { + if (!repo_in_merge_bases(&subrepo, commit_o, commit_a) || + !repo_in_merge_bases(&subrepo, commit_o, commit_b)) { path_msg(opt, path, 0, _("Failed to merge submodule %s " "(commits don't follow merge-base)"), path); - return 0; + goto cleanup; } /* Case #1: a is contained in b or vice versa */ - if (in_merge_bases(commit_a, commit_b)) { + if (repo_in_merge_bases(&subrepo, commit_a, commit_b)) { oidcpy(result, b); path_msg(opt, path, 1, _("Note: Fast-forwarding submodule %s to %s"), path, oid_to_hex(b)); - return 1; + ret = 1; + goto cleanup; } - if (in_merge_bases(commit_b, commit_a)) { + if (repo_in_merge_bases(&subrepo, commit_b, commit_a)) { oidcpy(result, a); path_msg(opt, path, 1, _("Note: Fast-forwarding submodule %s to %s"), path, oid_to_hex(a)); - return 1; + ret = 1; + goto cleanup; } /* @@ -1273,10 +1642,10 @@ static int merge_submodule(struct merge_options *opt, /* Skip the search if makes no sense to the calling context. */ if (!search) - return 0; + goto cleanup; /* find commit which merges them */ - parent_count = find_first_merges(opt->repo, path, commit_a, commit_b, + parent_count = find_first_merges(&subrepo, path, commit_a, commit_b, &merges); switch (parent_count) { case 0: @@ -1310,7 +1679,9 @@ static int merge_submodule(struct merge_options *opt, } object_array_clear(&merges); - return 0; +cleanup: + repo_clear(&subrepo); + return ret; } static void initialize_attr_index(struct merge_options *opt) @@ -1951,12 +2322,17 @@ static void apply_directory_rename_modifications(struct merge_options *opt, VERIFY_CI(ci); /* Find parent directories missing from opt->priv->paths */ - cur_path = new_path; + cur_path = mem_pool_strdup(&opt->priv->pool, new_path); + free((char*)new_path); + new_path = (char *)cur_path; + while (1) { /* Find the parent directory of cur_path */ char *last_slash = strrchr(cur_path, '/'); if (last_slash) { - parent_name = xstrndup(cur_path, last_slash - cur_path); + parent_name = mem_pool_strndup(&opt->priv->pool, + cur_path, + last_slash - cur_path); } else { parent_name = opt->priv->toplevel_dir; break; @@ -1965,7 +2341,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt, /* Look it up in opt->priv->paths */ entry = strmap_get_entry(&opt->priv->paths, parent_name); if (entry) { - free((char*)parent_name); parent_name = entry->key; /* reuse known pointer */ break; } @@ -1992,13 +2367,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt, parent_name = cur_dir; } - /* - * We are removing old_path from opt->priv->paths. old_path also will - * eventually need to be freed, but it may still be used by e.g. - * ci->pathnames. So, store it in another string-list for now. - */ - string_list_append(&opt->priv->paths_to_free, old_path); - assert(ci->filemask == 2 || ci->filemask == 4); assert(ci->dirmask == 0); strmap_remove(&opt->priv->paths, old_path, 0); @@ -2032,7 +2400,6 @@ static void apply_directory_rename_modifications(struct merge_options *opt, new_ci->stages[index].mode = ci->stages[index].mode; oidcpy(&new_ci->stages[index].oid, &ci->stages[index].oid); - free(ci); ci = new_ci; } @@ -2460,10 +2827,23 @@ static void use_cached_pairs(struct merge_options *opt, if (!new_name) new_name = old_name; + /* + * cached_pairs has *copies* of old_name and new_name, + * because it has to persist across merges. Since + * pool_alloc_filespec() will just re-use the existing + * filenames, which will also get re-used by + * opt->priv->paths if they become renames, and then + * get freed at the end of the merge, that would leave + * the copy in cached_pairs dangling. Avoid this by + * making a copy here. + */ + old_name = mem_pool_strdup(&opt->priv->pool, old_name); + new_name = mem_pool_strdup(&opt->priv->pool, new_name); + /* We don't care about oid/mode, only filenames and status */ - one = alloc_filespec(old_name); - two = alloc_filespec(new_name); - diff_queue(pairs, one, two); + one = pool_alloc_filespec(&opt->priv->pool, old_name); + two = pool_alloc_filespec(&opt->priv->pool, new_name); + pool_diff_queue(&opt->priv->pool, pairs, one, two); pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D'; } } @@ -2538,8 +2918,8 @@ static int compare_pairs(const void *a_, const void *b_) } /* Call diffcore_rename() to update deleted/added pairs into rename pairs */ -static void detect_regular_renames(struct merge_options *opt, - unsigned side_index) +static int detect_regular_renames(struct merge_options *opt, + unsigned side_index) { struct diff_options diff_opts; struct rename_info *renames = &opt->priv->renames; @@ -2552,7 +2932,7 @@ static void detect_regular_renames(struct merge_options *opt, * side had directory renames. */ resolve_diffpair_statuses(&renames->pairs[side_index]); - return; + return 0; } partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]); @@ -2562,7 +2942,7 @@ static void detect_regular_renames(struct merge_options *opt, diff_opts.detect_rename = DIFF_DETECT_RENAME; diff_opts.rename_limit = opt->rename_limit; if (opt->rename_limit <= 0) - diff_opts.rename_limit = 1000; + diff_opts.rename_limit = 7000; diff_opts.rename_score = opt->rename_score; diff_opts.show_rename_progress = opt->show_rename_progress; diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT; @@ -2571,6 +2951,7 @@ static void detect_regular_renames(struct merge_options *opt, diff_queued_diff = renames->pairs[side_index]; trace2_region_enter("diff", "diffcore_rename", opt->repo); diffcore_rename_extended(&diff_opts, + &opt->priv->pool, &renames->relevant_sources[side_index], &renames->dirs_removed[side_index], &renames->dir_rename_count[side_index], @@ -2578,6 +2959,8 @@ static void detect_regular_renames(struct merge_options *opt, trace2_region_leave("diff", "diffcore_rename", opt->repo); resolve_diffpair_statuses(&diff_queued_diff); + if (diff_opts.needed_rename_limit > 0) + renames->redo_after_renames = 0; if (diff_opts.needed_rename_limit > renames->needed_limit) renames->needed_limit = diff_opts.needed_rename_limit; @@ -2587,6 +2970,8 @@ static void detect_regular_renames(struct merge_options *opt, diff_queued_diff.nr = 0; diff_queued_diff.queue = NULL; diff_flush(&diff_opts); + + return 1; } /* @@ -2617,7 +3002,7 @@ static int collect_renames(struct merge_options *opt, if (p->status != 'A' && p->status != 'R') { possibly_cache_new_pair(renames, p, side_index, NULL); - diff_free_filepair(p); + pool_diff_free_filepair(&opt->priv->pool, p); continue; } @@ -2630,7 +3015,7 @@ static int collect_renames(struct merge_options *opt, possibly_cache_new_pair(renames, p, side_index, new_path); if (p->status != 'R' && !new_path) { - diff_free_filepair(p); + pool_diff_free_filepair(&opt->priv->pool, p); continue; } @@ -2676,14 +3061,32 @@ static int detect_and_process_renames(struct merge_options *opt, struct diff_queue_struct combined; struct rename_info *renames = &opt->priv->renames; int need_dir_renames, s, clean = 1; + unsigned detection_run = 0; memset(&combined, 0, sizeof(combined)); if (!possible_renames(renames)) goto cleanup; trace2_region_enter("merge", "regular renames", opt->repo); - detect_regular_renames(opt, MERGE_SIDE1); - detect_regular_renames(opt, MERGE_SIDE2); + detection_run |= detect_regular_renames(opt, MERGE_SIDE1); + detection_run |= detect_regular_renames(opt, MERGE_SIDE2); + if (renames->redo_after_renames && detection_run) { + int i, side; + struct diff_filepair *p; + + /* Cache the renames, we found */ + for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) { + for (i = 0; i < renames->pairs[side].nr; ++i) { + p = renames->pairs[side].queue[i]; + possibly_cache_new_pair(renames, p, side, NULL); + } + } + + /* Restart the merge with the cached renames */ + renames->redo_after_renames = 2; + trace2_region_leave("merge", "regular renames", opt->repo); + goto cleanup; + } use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]); use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]); trace2_region_leave("merge", "regular renames", opt->repo); @@ -2730,7 +3133,7 @@ cleanup: side_pairs = &renames->pairs[s]; for (i = 0; i < side_pairs->nr; ++i) { struct diff_filepair *p = side_pairs->queue[i]; - diff_free_filepair(p); + pool_diff_free_filepair(&opt->priv->pool, p); } } @@ -2743,7 +3146,8 @@ simple_cleanup: if (combined.nr) { int i; for (i = 0; i < combined.nr; i++) - diff_free_filepair(combined.queue[i]); + pool_diff_free_filepair(&opt->priv->pool, + combined.queue[i]); free(combined.queue); } @@ -3217,7 +3621,8 @@ static void process_entry(struct merge_options *opt, * the directory to remain here, so we need to move this * path to some new location. */ - CALLOC_ARRAY(new_ci, 1); + new_ci = mem_pool_calloc(&opt->priv->pool, 1, sizeof(*new_ci)); + /* We don't really want new_ci->merged.result copied, but it'll * be overwritten below so it doesn't matter. We also don't * want any directory mode/oid values copied, but we'll zero @@ -3309,7 +3714,8 @@ static void process_entry(struct merge_options *opt, const char *a_path = NULL, *b_path = NULL; int rename_a = 0, rename_b = 0; - new_ci = xmalloc(sizeof(*new_ci)); + new_ci = mem_pool_alloc(&opt->priv->pool, + sizeof(*new_ci)); if (S_ISREG(a_mode)) rename_a = 1; @@ -3378,17 +3784,8 @@ static void process_entry(struct merge_options *opt, b_path = path; strmap_put(&opt->priv->paths, b_path, new_ci); - if (rename_a && rename_b) { + if (rename_a && rename_b) strmap_remove(&opt->priv->paths, path, 0); - /* - * We removed path from opt->priv->paths. path - * will also eventually need to be freed, but - * it may still be used by e.g. ci->pathnames. - * So, store it in another string-list for now. - */ - string_list_append(&opt->priv->paths_to_free, - path); - } /* * Do special handling for b_path since process_entry() @@ -3665,11 +4062,7 @@ static int checkout(struct merge_options *opt, unpack_opts.quiet = 0; /* FIXME: sequencer might want quiet? */ unpack_opts.verbose_update = (opt->verbosity > 2); unpack_opts.fn = twoway_merge; - if (1/* FIXME: opts->overwrite_ignore*/) { - CALLOC_ARRAY(unpack_opts.dir, 1); - unpack_opts.dir->flags |= DIR_SHOW_IGNORED; - setup_standard_excludes(unpack_opts.dir); - } + unpack_opts.preserve_ignored = 0; /* FIXME: !opts->overwrite_ignore */ parse_tree(prev); init_tree_desc(&trees[0], prev->buffer, prev->size); parse_tree(next); @@ -3677,8 +4070,6 @@ static int checkout(struct merge_options *opt, ret = unpack_trees(2, trees, &unpack_opts); clear_unpack_trees_porcelain(&unpack_opts); - dir_clear(unpack_opts.dir); - FREE_AND_NULL(unpack_opts.dir); return ret; } @@ -3694,6 +4085,21 @@ static int record_conflicted_index_entries(struct merge_options *opt) if (strmap_empty(&opt->priv->conflicted)) return 0; + /* + * We are in a conflicted state. These conflicts might be inside + * sparse-directory entries, so check if any entries are outside + * of the sparse-checkout cone preemptively. + * + * We set original_cache_nr below, but that might change if + * index_name_pos() calls ask for paths within sparse directories. + */ + strmap_for_each_entry(&opt->priv->conflicted, &iter, e) { + if (!path_in_sparse_checkout(e->key, index)) { + ensure_full_index(index); + break; + } + } + /* If any entries have skip_worktree set, we'll have to check 'em out */ state.force = 1; state.quiet = 1; @@ -3701,7 +4107,7 @@ static int record_conflicted_index_entries(struct merge_options *opt) state.istate = index; original_cache_nr = index->cache_nr; - /* Put every entry from paths into plist, then sort */ + /* Append every entry from conflicted into index, then sort */ strmap_for_each_entry(&opt->priv->conflicted, &iter, e) { const char *path = e->key; struct conflict_info *ci = e->value; @@ -3929,6 +4335,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) { struct rename_info *renames; int i; + struct mem_pool *pool = NULL; /* Sanity checks on opt */ trace2_region_enter("merge", "sanity checks", opt->repo); @@ -3994,9 +4401,11 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) /* Initialization of various renames fields */ renames = &opt->priv->renames; + mem_pool_init(&opt->priv->pool, 0); + pool = &opt->priv->pool; for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) { strintmap_init_with_options(&renames->dirs_removed[i], - NOT_RELEVANT, NULL, 0); + NOT_RELEVANT, pool, 0); strmap_init_with_options(&renames->dir_rename_count[i], NULL, 1); strmap_init_with_options(&renames->dir_renames[i], @@ -4010,7 +4419,7 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) */ strintmap_init_with_options(&renames->relevant_sources[i], -1 /* explicitly invalid */, - NULL, 0); + pool, 0); strmap_init_with_options(&renames->cached_pairs[i], NULL, 1); strset_init_with_options(&renames->cached_irrelevant[i], @@ -4018,19 +4427,25 @@ static void merge_start(struct merge_options *opt, struct merge_result *result) strset_init_with_options(&renames->cached_target_names[i], NULL, 0); } + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) { + strintmap_init_with_options(&renames->deferred[i].possible_trivial_merges, + 0, pool, 0); + strset_init_with_options(&renames->deferred[i].target_dirs, + pool, 1); + renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */ + } /* * Although we initialize opt->priv->paths with strdup_strings=0, * that's just to avoid making yet another copy of an allocated * string. Putting the entry into paths means we are taking - * ownership, so we will later free it. paths_to_free is similar. + * ownership, so we will later free it. * * In contrast, conflicted just has a subset of keys from paths, so * we don't want to free those (it'd be a duplicate free). */ - strmap_init_with_options(&opt->priv->paths, NULL, 0); - strmap_init_with_options(&opt->priv->conflicted, NULL, 0); - string_list_init_nodup(&opt->priv->paths_to_free); + strmap_init_with_options(&opt->priv->paths, pool, 0); + strmap_init_with_options(&opt->priv->conflicted, pool, 0); /* * keys & strbufs in output will sometimes need to outlive "paths", @@ -4106,6 +4521,7 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt, opt->subtree_shift); } +redo: trace2_region_enter("merge", "collect_merge_info", opt->repo); if (collect_merge_info(opt, merge_base, side1, side2) != 0) { /* @@ -4125,6 +4541,12 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt, result->clean = detect_and_process_renames(opt, merge_base, side1, side2); trace2_region_leave("merge", "renames", opt->repo); + if (opt->priv->renames.redo_after_renames == 2) { + trace2_region_enter("merge", "reset_maps", opt->repo); + clear_or_reinit_internal_opts(opt->priv, 1); + trace2_region_leave("merge", "reset_maps", opt->repo); + goto redo; + } trace2_region_enter("merge", "process_entries", opt->repo); process_entries(opt, &working_tree_oid); |