/* * "Ostensibly Recursive's Twin" merge strategy, or "ort" for short. Meant * as a drop-in replacement for the "recursive" merge strategy, allowing one * to replace * * git merge [-s recursive] * * with * * git merge -s ort * * Note: git's parser allows the space between '-s' and its argument to be * missing. (Should I have backronymed "ham", "alsa", "kip", "nap, "alvo", * "cale", "peedy", or "ins" instead of "ort"?) */ #include "cache.h" #include "merge-ort.h" #include "alloc.h" #include "attr.h" #include "blob.h" #include "cache-tree.h" #include "commit.h" #include "commit-reach.h" #include "diff.h" #include "diffcore.h" #include "dir.h" #include "entry.h" #include "ll-merge.h" #include "object-store.h" #include "promisor-remote.h" #include "revision.h" #include "strmap.h" #include "submodule-config.h" #include "submodule.h" #include "tree.h" #include "unpack-trees.h" #include "xdiff-interface.h" /* * We have many arrays of size 3. Whenever we have such an array, the * indices refer to one of the sides of the three-way merge. This is so * pervasive that the constants 0, 1, and 2 are used in many places in the * code (especially in arithmetic operations to find the other side's index * or to compute a relevant mask), but sometimes these enum names are used * to aid code clarity. * * See also 'filemask' and 'dirmask' in struct conflict_info; the "ith side" * referred to there is one of these three sides. */ enum merge_side { MERGE_BASE = 0, MERGE_SIDE1 = 1, MERGE_SIDE2 = 2 }; static unsigned RESULT_INITIALIZED = 0x1abe11ed; /* unlikely accidental value */ struct traversal_callback_data { unsigned long mask; unsigned long dirmask; struct name_entry names[3]; }; struct deferred_traversal_data { /* * possible_trivial_merges: directories to be explored only when needed * * possible_trivial_merges is a map of directory names to * dir_rename_mask. When we detect that a directory is unchanged on * one side, we can sometimes resolve the directory without recursing * into it. Renames are the only things that can prevent such an * optimization. However, for rename sources: * - If no parent directory needed directory rename detection, then * no path under such a directory can be a relevant_source. * and for rename destinations: * - If no cached rename has a target path under the directory AND * - If there are no unpaired relevant_sources elsewhere in the * repository * then we don't need any path under this directory for a rename * destination. The only way to know the last item above is to defer * handling such directories until the end of collect_merge_info(), * in handle_deferred_entries(). * * For each we store dir_rename_mask, since that's the only bit of * information we need, other than the path, to resume the recursive * traversal. */ struct strintmap possible_trivial_merges; /* * trivial_merges_okay: if trivial directory merges are okay * * See possible_trivial_merges above. The "no unpaired * relevant_sources elsewhere in the repository" is a single boolean * per merge side, which we store here. Note that while 0 means no, * 1 only means "maybe" rather than "yes"; we optimistically set it * to 1 initially and only clear when we determine it is unsafe to * do trivial directory merges. */ unsigned trivial_merges_okay; /* * target_dirs: ancestor directories of rename targets * * target_dirs contains all directory names that are an ancestor of * any rename destination. */ struct strset target_dirs; }; struct rename_info { /* * All variables that are arrays of size 3 correspond to data tracked * for the sides in enum merge_side. Index 0 is almost always unused * because we often only need to track information for MERGE_SIDE1 and * MERGE_SIDE2 (MERGE_BASE can't have rename information since renames * are determined relative to what changed since the MERGE_BASE). */ /* * pairs: pairing of filenames from diffcore_rename() */ struct diff_queue_struct pairs[3]; /* * dirs_removed: directories removed on a given side of history. * * The keys of dirs_removed[side] are the directories that were removed * on the given side of history. The value of the strintmap for each * directory is a value from enum dir_rename_relevance. */ struct strintmap dirs_removed[3]; /* * dir_rename_count: tracking where parts of a directory were renamed to * * When files in a directory are renamed, they may not all go to the * same location. Each strmap here tracks: * old_dir => {new_dir => int} * That is, dir_rename_count[side] is a strmap to a strintmap. */ struct strmap dir_rename_count[3]; /* * dir_renames: computed directory renames * * This is a map of old_dir => new_dir and is derived in part from * dir_rename_count. */ struct strmap dir_renames[3]; /* * relevant_sources: deleted paths wanted in rename detection, and why * * relevant_sources is a set of deleted paths on each side of * history for which we need rename detection. If a path is deleted * on one side of history, we need to detect if it is part of a * rename if either * * the file is modified/deleted on the other side of history * * we need to detect renames for an ancestor directory * If neither of those are true, we can skip rename detection for * that path. The reason is stored as a value from enum * file_rename_relevance, as the reason can inform the algorithm in * diffcore_rename_extended(). */ struct strintmap relevant_sources[3]; struct deferred_traversal_data deferred[3]; /* * dir_rename_mask: * 0: optimization removing unmodified potential rename source okay * 2 or 4: optimization okay, but must check for files added to dir * 7: optimization forbidden; need rename source in case of dir rename */ unsigned dir_rename_mask:3; /* * callback_data_*: supporting data structures for alternate traversal * * We sometimes need to be able to traverse through all the files * in a given tree before all immediate subdirectories within that * tree. Since traverse_trees() doesn't do that naturally, we have * a traverse_trees_wrapper() that stores any immediate * subdirectories while traversing files, then traverses the * immediate subdirectories later. These callback_data* variables * store the information for the subdirectories so that we can do * that traversal order. */ struct traversal_callback_data *callback_data; int callback_data_nr, callback_data_alloc; char *callback_data_traverse_path; /* * merge_trees: trees passed to the merge algorithm for the merge * * merge_trees records the trees passed to the merge algorithm. But, * this data also is stored in merge_result->priv. If a sequence of * merges are being done (such as when cherry-picking or rebasing), * the next merge can look at this and re-use information from * previous merges under certain circumstances. * * See also all the cached_* variables. */ struct tree *merge_trees[3]; /* * cached_pairs_valid_side: which side's cached info can be reused * * See the description for merge_trees. For repeated merges, at most * only one side's cached information can be used. Valid values: * MERGE_SIDE2: cached data from side2 can be reused * MERGE_SIDE1: cached data from side1 can be reused * 0: no cached data can be reused * -1: See redo_after_renames; both sides can be reused. */ int cached_pairs_valid_side; /* * cached_pairs: Caching of renames and deletions. * * These are mappings recording renames and deletions of individual * files (not directories). They are thus a map from an old * filename to either NULL (for deletions) or a new filename (for * renames). */ struct strmap cached_pairs[3]; /* * cached_target_names: just the destinations from cached_pairs * * We sometimes want a fast lookup to determine if a given filename * is one of the destinations in cached_pairs. cached_target_names * is thus duplicative information, but it provides a fast lookup. */ struct strset cached_target_names[3]; /* * cached_irrelevant: Caching of rename_sources that aren't relevant. * * If we try to detect a rename for a source path and succeed, it's * part of a rename. If we try to detect a rename for a source path * and fail, then it's a delete. If we do not try to detect a rename * for a path, then we don't know if it's a rename or a delete. If * merge-ort doesn't think the path is relevant, then we just won't * cache anything for that path. But there's a slight problem in * that merge-ort can think a path is RELEVANT_LOCATION, but due to * commit 9bd342137e ("diffcore-rename: determine which * relevant_sources are no longer relevant", 2021-03-13), * diffcore-rename can downgrade the path to RELEVANT_NO_MORE. To * avoid excessive calls to diffcore_rename_extended() we still need * to cache such paths, though we cannot record them as either * renames or deletes. So we cache them here as a "turned out to be * irrelevant *for this commit*" as they are often also irrelevant * for subsequent commits, though we will have to do some extra * checking to see whether such paths become relevant for rename * detection when cherry-picking/rebasing subsequent commits. */ struct strset cached_irrelevant[3]; /* * redo_after_renames: optimization flag for "restarting" the merge * * Sometimes it pays to detect renames, cache them, and then * restart the merge operation from the beginning. The reason for * this is that when we know where all the renames are, we know * whether a certain directory has any paths under it affected -- * and if a directory is not affected then it permits us to do * trivial tree merging in more cases. Doing trivial tree merging * prevents the need to run process_entry() on every path * underneath trees that can be trivially merged, and * process_entry() is more expensive than collect_merge_info() -- * plus, the second collect_merge_info() will be much faster since * it doesn't have to recurse into the relevant trees. * * Values for this flag: * 0 = don't bother, not worth it (or conditions not yet checked) * 1 = conditions for optimization met, optimization worthwhile * 2 = we already did it (don't restart merge yet again) */ unsigned redo_after_renames; /* * needed_limit: value needed for inexact rename detection to run * * If the current rename limit wasn't high enough for inexact * rename detection to run, this records the limit needed. Otherwise, * this value remains 0. */ int needed_limit; }; struct merge_options_internal { /* * paths: primary data structure in all of merge ort. * * The keys of paths: * * are full relative paths from the toplevel of the repository * (e.g. "drivers/firmware/raspberrypi.c"). * * store all relevant paths in the repo, both directories and * files (e.g. drivers, drivers/firmware would also be included) * * these keys serve to intern all the path strings, which allows * us to do pointer comparison on directory names instead of * strcmp; we just have to be careful to use the interned strings. * * The values of paths: * * either a pointer to a merged_info, or a conflict_info struct * * merged_info contains all relevant information for a * non-conflicted entry. * * conflict_info contains a merged_info, plus any additional * information about a conflict such as the higher orders stages * involved and the names of the paths those came from (handy * once renames get involved). * * a path may start "conflicted" (i.e. point to a conflict_info) * and then a later step (e.g. three-way content merge) determines * it can be cleanly merged, at which point it'll be marked clean * and the algorithm will ignore any data outside the contained * merged_info for that entry * * If an entry remains conflicted, the merged_info portion of a * conflict_info will later be filled with whatever version of * the file should be placed in the working directory (e.g. an * as-merged-as-possible variation that contains conflict markers). */ struct strmap paths; /* * conflicted: a subset of keys->values from "paths" * * conflicted is basically an optimization between process_entries() * and record_conflicted_index_entries(); the latter could loop over * ALL the entries in paths AGAIN and look for the ones that are * still conflicted, but since process_entries() has to loop over * all of them, it saves the ones it couldn't resolve in this strmap * so that record_conflicted_index_entries() can iterate just the * relevant entries. */ struct strmap conflicted; /* * pool: memory pool for fast allocation/deallocation * * We allocate room for lots of filenames and auxiliary data * structures in merge_options_internal, and it tends to all be * freed together too. Using a memory pool for these provides a * nice speedup. */ struct mem_pool pool; /* * output: special messages and conflict notices for various paths * * This is a map of pathnames (a subset of the keys in "paths" above) * to strbufs. It gathers various warning/conflict/notice messages * for later processing. */ struct strmap output; /* * renames: various data relating to rename detection */ struct rename_info renames; /* * attr_index: hacky minimal index used for renormalization * * renormalization code _requires_ an index, though it only needs to * find a .gitattributes file within the index. So, when * renormalization is important, we create a special index with just * that one file. */ struct index_state attr_index; /* * current_dir_name, toplevel_dir: temporary vars * * These are used in collect_merge_info_callback(), and will set the * various merged_info.directory_name for the various paths we get; * see documentation for that variable and the requirements placed on * that field. */ const char *current_dir_name; const char *toplevel_dir; /* call_depth: recursion level counter for merging merge bases */ int call_depth; }; struct version_info { struct object_id oid; unsigned short mode; }; struct merged_info { /* if is_null, ignore result. otherwise result has oid & mode */ struct version_info result; unsigned is_null:1; /* * clean: whether the path in question is cleanly merged. * * see conflict_info.merged for more details. */ unsigned clean:1; /* * basename_offset: offset of basename of path. * * perf optimization to avoid recomputing offset of final '/' * character in pathname (0 if no '/' in pathname). */ size_t basename_offset; /* * directory_name: containing directory name. * * Note that we assume directory_name is constructed such that * strcmp(dir1_name, dir2_name) == 0 iff dir1_name == dir2_name, * i.e. string equality is equivalent to pointer equality. For this * to hold, we have to be careful setting directory_name. */ const char *directory_name; }; struct conflict_info { /* * merged: the version of the path that will be written to working tree * * WARNING: It is critical to check merged.clean and ensure it is 0 * before reading any conflict_info fields outside of merged. * Allocated merge_info structs will always have clean set to 1. * Allocated conflict_info structs will have merged.clean set to 0 * initially. The merged.clean field is how we know if it is safe * to access other parts of conflict_info besides merged; if a * conflict_info's merged.clean is changed to 1, the rest of the * algorithm is not allowed to look at anything outside of the * merged member anymore. */ struct merged_info merged; /* oids & modes from each of the three trees for this path */ struct version_info stages[3]; /* pathnames for each stage; may differ due to rename detection */ const char *pathnames[3]; /* Whether this path is/was involved in a directory/file conflict */ unsigned df_conflict:1; /* * Whether this path is/was involved in a non-content conflict other * than a directory/file conflict (e.g. rename/rename, rename/delete, * file location based on possible directory rename). */ unsigned path_conflict:1; /* * For filemask and dirmask, the ith bit corresponds to whether the * ith entry is a file (filemask) or a directory (dirmask). Thus, * filemask & dirmask is always zero, and filemask | dirmask is at * most 7 but can be less when a path does not appear as either a * file or a directory on at least one side of history. * * Note that these masks are related to enum merge_side, as the ith * entry corresponds to side i. * * These values come from a traverse_trees() call; more info may be * found looking at tree-walk.h's struct traverse_info, * particularly the documentation above the "fn" member (note that * filemask = mask & ~dirmask from that documentation). */ unsigned filemask:3; unsigned dirmask:3; /* * Optimization to track which stages match, to avoid the need to * recompute it in multiple steps. Either 0 or at least 2 bits are * set; if at least 2 bits are set, their corresponding stages match. */ unsigned match_mask:3; }; /*** Function Grouping: various utility functions ***/ /* * For the next three macros, see warning for conflict_info.merged. * * In each of the below, mi is a struct merged_info*, and ci was defined * as a struct conflict_info* (but we need to verify ci isn't actually * pointed at a struct merged_info*). * * INITIALIZE_CI: Assign ci to mi but only if it's safe; set to NULL otherwise. * VERIFY_CI: Ensure that something we assigned to a conflict_info* is one. * ASSIGN_AND_VERIFY_CI: Similar to VERIFY_CI but do assignment first. */ #define INITIALIZE_CI(ci, mi) do { \ (ci) = (!(mi) || (mi)->clean) ? NULL : (struct conflict_info *)(mi); \ } while (0) #define VERIFY_CI(ci) assert(ci && !ci->merged.clean); #define ASSIGN_AND_VERIFY_CI(ci, mi) do { \ (ci) = (struct conflict_info *)(mi); \ assert((ci) && !(mi)->clean); \ } while (0) static void free_strmap_strings(struct strmap *map) { struct hashmap_iter iter; struct strmap_entry *entry; strmap_for_each_entry(map, &iter, entry) { free((char*)entry->key); } } static void clear_or_reinit_internal_opts(struct merge_options_internal *opti, int reinitialize) { struct rename_info *renames = &opti->renames; int i; void (*strmap_clear_func)(struct strmap *, int) = reinitialize ? strmap_partial_clear : strmap_clear; void (*strintmap_clear_func)(struct strintmap *) = reinitialize ? strintmap_partial_clear : strintmap_clear; void (*strset_clear_func)(struct strset *) = reinitialize ? strset_partial_clear : strset_clear; strmap_clear_func(&opti->paths, 0); /* * All keys and values in opti->conflicted are a subset of those in * opti->paths. We don't want to deallocate anything twice, so we * don't free the keys and we pass 0 for free_values. */ strmap_clear_func(&opti->conflicted, 0); if (opti->attr_index.cache_nr) /* true iff opt->renormalize */ discard_index(&opti->attr_index); /* Free memory used by various renames maps */ for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) { strintmap_clear_func(&renames->dirs_removed[i]); strmap_clear_func(&renames->dir_renames[i], 0); strintmap_clear_func(&renames->relevant_sources[i]); if (!reinitialize) assert(renames->cached_pairs_valid_side == 0); if (i != renames->cached_pairs_valid_side && -1 != renames->cached_pairs_valid_side) { strset_clear_func(&renames->cached_target_names[i]); strmap_clear_func(&renames->cached_pairs[i], 1); strset_clear_func(&renames->cached_irrelevant[i]); partial_clear_dir_rename_count(&renames->dir_rename_count[i]); if (!reinitialize) strmap_clear(&renames->dir_rename_count[i], 1); } } for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) { strintmap_clear_func(&renames->deferred[i].possible_trivial_merges); strset_clear_func(&renames->deferred[i].target_dirs); renames->deferred[i].trivial_merges_okay = 1; /* 1 == maybe */ } renames->cached_pairs_valid_side = 0; renames->dir_rename_mask = 0; if (!reinitialize) { struct hashmap_iter iter; struct strmap_entry *e; /* Release and free each strbuf found in output */ strmap_for_each_entry(&opti->output, &iter, e) { struct strbuf *sb = e->value; strbuf_release(sb); /* * While strictly speaking we don't need to free(sb) * here because we could pass free_values=1 when * calling strmap_clear() on opti->output, that would * require strmap_clear to do another * strmap_for_each_entry() loop, so we just free it * while we're iterating anyway. */ free(sb); } strmap_clear(&opti->output, 0); } mem_pool_discard(&opti->pool, 0); /* Clean out callback_data as well. */ FREE_AND_NULL(renames->callback_data); renames->callback_data_nr = renames->callback_data_alloc = 0; } __attribute__((format (printf, 2, 3))) static int err(struct merge_options *opt, const char *err, ...) { va_list params; struct strbuf sb = STRBUF_INIT; strbuf_addstr(&sb, "error: "); va_start(params, err); strbuf_vaddf(&sb, err, params); va_end(params); error("%s", sb.buf); strbuf_release(&sb); return -1; } static void format_commit(struct strbuf *sb, int indent, struct repository *repo, struct commit *commit) { struct merge_remote_desc *desc; struct pretty_print_context ctx = {0}; ctx.abbrev = DEFAULT_ABBREV; strbuf_addchars(sb, ' ', indent); desc = merge_remote_util(commit); if (desc) { strbuf_addf(sb, "virtual %s\n", desc->name); return; } repo_format_commit_message(repo, commit, "%h %s", sb, &ctx); strbuf_addch(sb, '\n'); } __attribute__((format (printf, 4, 5))) static void path_msg(struct merge_options *opt, const char *path, int omittable_hint, /* skippable under --remerge-diff */ const char *fmt, ...) { va_list ap; struct strbuf *sb = strmap_get(&opt->priv->output, path); if (!sb) { sb = xmalloc(sizeof(*sb)); strbuf_init(sb, 0); strmap_put(&opt->priv->output, path, sb); } va_start(ap, fmt); strbuf_vaddf(sb, fmt, ap); va_end(ap); strbuf_addch(sb, '\n'); } static struct diff_filespec *pool_alloc_filespec(struct mem_pool *pool, const char *path) { /* Similar to alloc_filespec(), but allocate from pool and reuse path */ struct diff_filespec *spec; spec = mem_pool_calloc(pool, 1, sizeof(*spec)); spec->path = (char*)path; /* spec won't modify it */ spec->count = 1; spec->is_binary = -1; return spec; } static struct diff_filepair *pool_diff_queue(struct mem_pool *pool, struct diff_queue_struct *queue, struct diff_filespec *one, struct diff_filespec *two) { /* Same code as diff_queue(), except allocate from pool */ struct diff_filepair *dp; dp = mem_pool_calloc(pool, 1, sizeof(*dp)); dp->one = one; dp->two = two; if (queue) diff_q(queue, dp); return dp; } /* add a string to a strbuf, but converting "/" to "_" */ static void add_flattened_path(struct strbuf *out, const char *s) { size_t i = out->len; strbuf_addstr(out, s); for (; i < out->len; i++) if (out->buf[i] == '/') out->buf[i] = '_'; } static char *unique_path(struct strmap *existing_paths, const char *path, const char *branch) { struct strbuf newpath = STRBUF_INIT; int suffix = 0; size_t base_len; strbuf_addf(&newpath, "%s~", path); add_flattened_path(&newpath, branch); base_len = newpath.len; while (strmap_contains(existing_paths, newpath.buf)) { strbuf_setlen(&newpath, base_len); strbuf_addf(&newpath, "_%d", suffix++); } return strbuf_detach(&newpath, NULL); } /*** Function Grouping: functions related to collect_merge_info() ***/ static int traverse_trees_wrapper_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info) { struct merge_options *opt = info->data; struct rename_info *renames = &opt->priv->renames; unsigned filemask = mask & ~dirmask; assert(n==3); if (!renames->callback_data_traverse_path) renames->callback_data_traverse_path = xstrdup(info->traverse_path); if (filemask && filemask == renames->dir_rename_mask) renames->dir_rename_mask = 0x07; ALLOC_GROW(renames->callback_data, renames->callback_data_nr + 1, renames->callback_data_alloc); renames->callback_data[renames->callback_data_nr].mask = mask; renames->callback_data[renames->callback_data_nr].dirmask = dirmask; COPY_ARRAY(renames->callback_data[renames->callback_data_nr].names, names, 3); renames->callback_data_nr++; return mask; } /* * Much like traverse_trees(), BUT: * - read all the tree entries FIRST, saving them * - note that the above step provides an opportunity to compute necessary * additional details before the "real" traversal * - loop through the saved entries and call the original callback on them */ static int traverse_trees_wrapper(struct index_state *istate, int n, struct tree_desc *t, struct traverse_info *info) { int ret, i, old_offset; traverse_callback_t old_fn; char *old_callback_data_traverse_path; struct merge_options *opt = info->data; struct rename_info *renames = &opt->priv->renames; assert(renames->dir_rename_mask == 2 || renames->dir_rename_mask == 4); old_callback_data_traverse_path = renames->callback_data_traverse_path; old_fn = info->fn; old_offset = renames->callback_data_nr; renames->callback_data_traverse_path = NULL; info->fn = traverse_trees_wrapper_callback; ret = traverse_trees(istate, n, t, info); if (ret < 0) return ret; info->traverse_path = renames->callback_data_traverse_path; info->fn = old_fn; for (i = old_offset; i < renames->callback_data_nr; ++i) { info->fn(n, renames->callback_data[i].mask, renames->callback_data[i].dirmask, renames->callback_data[i].names, info); } renames->callback_data_nr = old_offset; free(renames->callback_data_traverse_path); renames->callback_data_traverse_path = old_callback_data_traverse_path; info->traverse_path = NULL; return 0; } static void setup_path_info(struct merge_options *opt, struct string_list_item *result, const char *current_dir_name, int current_dir_name_len, char *fullpath, /* we'll take over ownership */ struct name_entry *names, struct name_entry *merged_version, unsigned is_null, /* boolean */ unsigned df_conflict, /* boolean */ unsigned filemask, unsigned dirmask, int resolved /* boolean */) { /* result->util is void*, so mi is a convenience typed variable */ struct merged_info *mi; assert(!is_null || resolved); assert(!df_conflict || !resolved); /* df_conflict implies !resolved */ assert(resolved == (merged_version != NULL)); mi = mem_pool_calloc(&opt->priv->pool, 1, resolved ? sizeof(struct merged_info) : sizeof(struct conflict_info)); mi->directory_name = current_dir_name; mi->basename_offset = current_dir_name_len; mi->clean = !!resolved; if (resolved) { mi->result.mode = merged_version->mode; oidcpy(&mi->result.oid, &merged_version->oid); mi->is_null = !!is_null; } else { int i; struct conflict_info *ci; ASSIGN_AND_VERIFY_CI(ci, mi); for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) { ci->pathnames[i] = fullpath; ci->stages[i].mode = names[i].mode; oidcpy(&ci->stages[i].oid, &names[i].oid); } ci->filemask = filemask; ci->dirmask = dirmask; ci->df_conflict = !!df_conflict; if (dirmask) /* * Assume is_null for now, but if we have entries * under the directory then when it is complete in * write_completed_directory() it'll update this. * Also, for D/F conflicts, we have to handle the * directory first, then clear this bit and process * the file to see how it is handled -- that occurs * near the top of process_entry(). */ mi->is_null = 1; } strmap_put(&opt->priv->paths, fullpath, mi); result->string = fullpath; result->util = mi; } static void add_pair(struct merge_options *opt, struct name_entry *names, const char *pathname, unsigned side, unsigned is_add /* if false, is_delete */, unsigned match_mask, unsigned dir_rename_mask) { struct diff_filespec *one, *two; struct rename_info *renames = &opt->priv->renames; int names_idx = is_add ? side : 0; if (is_add) { assert(match_mask == 0 || match_mask == 6); if (strset_contains(&renames->cached_target_names[side], pathname)) return; } else { unsigned content_relevant = (match_mask == 0); unsigned location_relevant = (dir_rename_mask == 0x07); assert(match_mask == 0 || match_mask == 3 || match_mask == 5); /* * If pathname is found in cached_irrelevant[side] due to * previous pick but for this commit content is relevant, * then we need to remove it from cached_irrelevant. */ if (content_relevant) /* strset_remove is no-op if strset doesn't have key */ strset_remove(&renames->cached_irrelevant[side], pathname); /* * We do not need to re-detect renames for paths that we already * know the pairing, i.e. for cached_pairs (or * cached_irrelevant). However, handle_deferred_entries() needs * to loop over the union of keys from relevant_sources[side] and * cached_pairs[side], so for simplicity we set relevant_sources * for all the cached_pairs too and then strip them back out in * prune_cached_from_relevant() at the beginning of * detect_regular_renames(). */ if (content_relevant || location_relevant) { /* content_relevant trumps location_relevant */ strintmap_set(&renames->relevant_sources[side], pathname, content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION); } /* * Avoid creating pair if we've already cached rename results. * Note that we do this after setting relevant_sources[side] * as noted in the comment above. */ if (strmap_contains(&renames->cached_pairs[side], pathname) || strset_contains(&renames->cached_irrelevant[side], pathname)) return; } one = pool_alloc_filespec(&opt->priv->pool, pathname); two = pool_alloc_filespec(&opt->priv->pool, pathname); fill_filespec(is_add ? two : one, &names[names_idx].oid, 1, names[names_idx].mode); pool_diff_queue(&opt->priv->pool, &renames->pairs[side], one, two); } static void collect_rename_info(struct merge_options *opt, struct name_entry *names, const char *dirname, const char *fullname, unsigned filemask, unsigned dirmask, unsigned match_mask) { struct rename_info *renames = &opt->priv->renames; unsigned side; /* * Update dir_rename_mask (determines ignore-rename-source validity) * * dir_rename_mask helps us keep track of when directory rename * detection may be relevant. Basically, whenver a directory is * removed on one side of history, and a file is added to that * directory on the other side of history, directory rename * detection is relevant (meaning we have to detect renames for all * files within that directory to deduce where the directory * moved). Also, whenever a directory needs directory rename * detection, due to the "majority rules" choice for where to move * it (see t6423 testcase 1f), we also need to detect renames for * all files within subdirectories of that directory as well. * * Here we haven't looked at files within the directory yet, we are * just looking at the directory itself. So, if we aren't yet in * a case where a parent directory needed directory rename detection * (i.e. dir_rename_mask != 0x07), and if the directory was removed * on one side of history, record the mask of the other side of * history in dir_rename_mask. */ if (renames->dir_rename_mask != 0x07 && (dirmask == 3 || dirmask == 5)) { /* simple sanity check */ assert(renames->dir_rename_mask == 0 || renames->dir_rename_mask == (dirmask & ~1)); /* update dir_rename_mask; have it record mask of new side */ renames->dir_rename_mask = (dirmask & ~1); } /* Update dirs_removed, as needed */ if (dirmask == 1 || dirmask == 3 || dirmask == 5) { /* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */ unsigned sides = (0x07 - dirmask)/2; unsigned relevance = (renames->dir_rename_mask == 0x07) ? RELEVANT_FOR_ANCESTOR : NOT_RELEVANT; /* * Record relevance of this directory. However, note that * when collect_merge_info_callback() recurses into this * directory and calls collect_rename_info() on paths * within that directory, if we find a path that was added * to this directory on the other side of history, we will * upgrade this value to RELEVANT_FOR_SELF; see below. */ if (sides & 1) strintmap_set(&renames->dirs_removed[1], fullname, relevance); if (sides & 2) strintmap_set(&renames->dirs_removed[2], fullname, relevance); } /* * Here's the block that potentially upgrades to RELEVANT_FOR_SELF. * When we run across a file added to a directory. In such a case, * find the directory of the file and upgrade its relevance. */ if (renames->dir_rename_mask == 0x07 && (filemask == 2 || filemask == 4)) { /* * Need directory rename for parent directory on other side * of history from added file. Thus * side = (~filemask & 0x06) >> 1 * or * side = 3 - (filemask/2). */ unsigned side = 3 - (filemask >> 1); strintmap_set(&renames->dirs_removed[side], dirname, RELEVANT_FOR_SELF); } if (filemask == 0 || filemask == 7) return; for (side = MERGE_SIDE1; side <= MERGE_SIDE2; ++side) { unsigned side_mask = (1 << side); /* Check for deletion on side */ if ((filemask & 1) && !(filemask & side_mask)) add_pair(opt, names, fullname, side, 0 /* delete */, match_mask & filemask, renames->dir_rename_mask); /* Check for addition on side */ if (!(filemask & 1) && (filemask & side_mask)) add_pair(opt, names, fullname, side, 1 /* add */, match_mask & filemask, renames->dir_rename_mask); } } static int collect_merge_info_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info) { /* * n is 3. Always. * common ancestor (mbase) has mask 1, and stored in index 0 of names * head of side 1 (side1) has mask 2, and stored in index 1 of names * head of side 2 (side2) has mask 4, and stored in index 2 of names */ struct merge_options *opt = info->data; struct merge_options_internal *opti = opt->priv; struct rename_info *renames = &opt->priv->renames; struct string_list_item pi; /* Path Info */ struct conflict_info *ci; /* typed alias to pi.util (which is void*) */ struct name_entry *p; size_t len; char *fullpath; const char *dirname = opti->current_dir_name; unsigned prev_dir_rename_mask = renames->dir_rename_mask; unsigned filemask = mask & ~dirmask; unsigned match_mask = 0; /* will be updated below */ unsigned mbase_null = !(mask & 1); unsigned side1_null = !(mask & 2); unsigned side2_null = !(mask & 4); unsigned side1_matches_mbase = (!side1_null && !mbase_null && names[0].mode == names[1].mode && oideq(&names[0].oid, &names[1].oid)); unsigned side2_matches_mbase = (!side2_null && !mbase_null && names[0].mode == names[2].mode && oideq(&names[0].oid, &names[2].oid)); unsigned sides_match = (!side1_null && !side2_null && names[1].mode == names[2].mode && oideq(&names[1].oid, &names[2].oid)); /* * Note: When a path is a file on one side of history and a directory * in another, we have a directory/file conflict. In such cases, if * the conflict doesn't resolve from renames and deletions, then we * always leave directories where they are and move files out of the * way. Thus, while struct conflict_info has a df_conflict field to * track such conflicts, we ignore that field for any directories at * a path and only pay attention to it for files at the given path. * The fact that we leave directories were they are also means that * we do not need to worry about getting additional df_conflict * information propagated from parent directories down to children * (unlike, say traverse_trees_recursive() in unpack-trees.c, which * sets a newinfo.df_conflicts field specifically to propagate it). */ unsigned df_conflict = (filemask != 0) && (dirmask != 0); /* n = 3 is a fundamental assumption. */ if (n != 3) BUG("Called collect_merge_info_callback wrong"); /* * A bunch of sanity checks verifying that traverse_trees() calls * us the way I expect. Could just remove these at some point, * though maybe they are helpful to future code readers. */ assert(mbase_null == is_null_oid(&names[0].oid)); assert(side1_null == is_null_oid(&names[1].oid)); assert(side2_null == is_null_oid(&names[2].oid)); assert(!mbase_null || !side1_null || !side2_null); assert(mask > 0 && mask < 8); /* Determine match_mask */ if (side1_matches_mbase) match_mask = (side2_matches_mbase ? 7 : 3); else if (side2_matches_mbase) match_mask = 5; else if (sides_match) match_mask = 6; /* * Get the name of the relevant filepath, which we'll pass to * setup_path_info() for tracking. */ p = names; while (!p->mode) p++; len = traverse_path_len(info, p->pathlen); /* +1 in both of the following lines to include the NUL byte */ fullpath = mem_pool_alloc(&opt->priv->pool, len + 1); make_traverse_path(fullpath, len + 1, info, p->path, p->pathlen); /* * If mbase, side1, and side2 all match, we can resolve early. Even * if these are trees, there will be no renames or anything * underneath. */ if (side1_matches_mbase && side2_matches_mbase) { /* mbase, side1, & side2 all match; use mbase as resolution */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, names, names+0, mbase_null, 0 /* df_conflict */, filemask, dirmask, 1 /* resolved */); return mask; } /* * If the sides match, and all three paths are present and are * files, then we can take either as the resolution. We can't do * this with trees, because there may be rename sources from the * merge_base. */ if (sides_match && filemask == 0x07) { /* use side1 (== side2) version as resolution */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, names, names+1, side1_null, 0, filemask, dirmask, 1); return mask; } /* * If side1 matches mbase and all three paths are present and are * files, then we can use side2 as the resolution. We cannot * necessarily do so this for trees, because there may be rename * destinations within side2. */ if (side1_matches_mbase && filemask == 0x07) { /* use side2 version as resolution */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, names, names+2, side2_null, 0, filemask, dirmask, 1); return mask; } /* Similar to above but swapping sides 1 and 2 */ if (side2_matches_mbase && filemask == 0x07) { /* use side1 version as resolution */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, names, names+1, side1_null, 0, filemask, dirmask, 1); return mask; } /* * Sometimes we can tell that a source path need not be included in * rename detection -- namely, whenever either * side1_matches_mbase && side2_null * or * side2_matches_mbase && side1_null * However, we call collect_rename_info() even in those cases, * because exact renames are cheap and would let us remove both a * source and destination path. We'll cull the unneeded sources * later. */ collect_rename_info(opt, names, dirname, fullpath, filemask, dirmask, match_mask); /* * None of the special cases above matched, so we have a * provisional conflict. (Rename detection might allow us to * unconflict some more cases, but that comes later so all we can * do now is record the different non-null file hashes.) */ setup_path_info(opt, &pi, dirname, info->pathlen, fullpath, names, NULL, 0, df_conflict, filemask, dirmask, 0); ci = pi.util; VERIFY_CI(ci); ci->match_mask = match_mask; /* If dirmask, recurse into subdirectories */ if (dirmask) { struct traverse_info newinfo; struct tree_desc t[3]; void *buf[3] = {NULL, NULL, NULL}; const char *original_dir_name; int i, ret, side; /* * Check for whether we can avoid recursing due to one side * matching the merge base. The side that does NOT match is * the one that might have a rename destination we need. */ assert(!side1_matches_mbase || !side2_matches_mbase); side = side1_matches_mbase ? MERGE_SIDE2 : side2_matches_mbase ? MERGE_SIDE1 : MERGE_BASE; if (filemask == 0 && (dirmask == 2 || dirmask == 4)) { /* * Also defer recursing into new directories; set up a * few variables to let us do so. */ ci->match_mask = (7 - dirmask); side = dirmask / 2; } if (renames->dir_rename_mask != 0x07 && side != MERGE_BASE && renames->deferred[side].trivial_merges_okay && !strset_contains(&renames->deferred[side].target_dirs, pi.string)) { strintmap_set(&renames->deferred[side].possible_trivial_merges, pi.string, renames->dir_rename_mask); renames->dir_rename_mask = prev_dir_rename_mask; return mask; } /* We need to recurse */ ci->match_mask &= filemask; newinfo = *info; newinfo.prev = info; newinfo.name = p->path; newinfo.namelen = p->pathlen; newinfo.pathlen = st_add3(newinfo.pathlen, p->pathlen, 1); /* * If this directory we are about to recurse into cared about * its parent directory (the current directory) having a D/F * conflict, then we'd propagate the masks in this way: * newinfo.df_conflicts |= (mask & ~dirmask); * But we don't worry about propagating D/F conflicts. (See * comment near setting of local df_conflict variable near * the beginning of this function). */ for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) { if (i == 1 && side1_matches_mbase) t[1] = t[0]; else if (i == 2 && side2_matches_mbase) t[2] = t[0]; else if (i == 2 && sides_match) t[2] = t[1]; else { const struct object_id *oid = NULL; if (dirmask & 1) oid = &names[i].oid; buf[i] = fill_tree_descriptor(opt->repo, t + i, oid); } dirmask >>= 1; } original_dir_name = opti->current_dir_name; opti->current_dir_name = pi.string; if (renames->dir_rename_mask == 0 || renames->dir_rename_mask == 0x07) ret = traverse_trees(NULL, 3, t, &newinfo); else ret = traverse_trees_wrapper(NULL, 3, t, &newinfo); opti->current_dir_name = original_dir_name; renames->dir_rename_mask = prev_dir_rename_mask; for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) free(buf[i]); if (ret < 0) return -1; } return mask; } static void resolve_trivial_directory_merge(struct conflict_info *ci, int side) { VERIFY_CI(ci); assert((side == 1 && ci->match_mask == 5) || (side == 2 && ci->match_mask == 3)); oidcpy(&ci->merged.result.oid, &ci->stages[side].oid); ci->merged.result.mode = ci->stages[side].mode; ci->merged.is_null = is_null_oid(&ci->stages[side].oid); ci->match_mask = 0; ci->merged.clean = 1; /* (ci->filemask == 0); */ } static int handle_deferred_entries(struct merge_options *opt, struct traverse_info *info) { struct rename_info *renames = &opt->priv->renames; struct hashmap_iter iter; struct strmap_entry *entry; int side, ret = 0; int path_count_before, path_count_after = 0; path_count_before = strmap_get_size(&opt->priv->paths); for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) { unsigned optimization_okay = 1; struct strintmap copy; /* Loop over the set of paths we need to know rename info for */ strset_for_each_entry(&renames->relevant_sources[side], &iter, entry) { char *rename_target, *dir, *dir_marker; struct strmap_entry *e; /* * If we don't know delete/rename info for this path, * then we need to recurse into all trees to get all * adds to make sure we have it. */ if (strset_contains(&renames->cached_irrelevant[side], entry->key)) continue; e = strmap_get_entry(&renames->cached_pairs[side], entry->key); if (!e) { optimization_okay = 0; break; } /* If this is a delete, we have enough info already */ rename_target = e->value; if (!rename_target) continue; /* If we already walked the rename target, we're good */ if (strmap_contains(&opt->priv->paths, rename_target)) continue; /* * Otherwise, we need to get a list of directories that * will need to be recursed into to get this * rename_target. */ dir = xstrdup(rename_target); while ((dir_marker = strrchr(dir, '/'))) { *dir_marker = '\0'; if (strset_contains(&renames->deferred[side].target_dirs, dir)) break; strset_add(&renames->deferred[side].target_dirs, dir); } free(dir); } renames->deferred[side].trivial_merges_okay = optimization_okay; /* * We need to recurse into any directories in * possible_trivial_merges[side] found in target_dirs[side]. * But when we recurse, we may need to queue up some of the * subdirectories for possible_trivial_merges[side]. Since * we can't safely iterate through a hashmap while also adding * entries, move the entries into 'copy', iterate over 'copy', * and then we'll also iterate anything added into * possible_trivial_merges[side] once this loop is done. */ copy = renames->deferred[side].possible_trivial_merges; strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges, 0, &opt->priv->pool, 0); strintmap_for_each_entry(©, &iter, entry) { const char *path = entry->key; unsigned dir_rename_mask = (intptr_t)entry->value; struct conflict_info *ci; unsigned dirmask; struct tree_desc t[3]; void *buf[3] = {NULL,}; int i; ci = strmap_get(&opt->priv->paths, path); VERIFY_CI(ci); dirmask = ci->dirmask; if (optimization_okay && !strset_contains(&renames->deferred[side].target_dirs, path)) { resolve_trivial_directory_merge(ci, side); continue; } info->name = path; info->namelen = strlen(path); info->pathlen = info->namelen + 1; for (i = 0; i < 3; i++, dirmask >>= 1) { if (i == 1 && ci->match_mask == 3) t[1] = t[0]; else if (i == 2 && ci->match_mask == 5) t[2] = t[0]; else if (i == 2 && ci->match_mask == 6) t[2] = t[1]; else { const struct object_id *oid = NULL; if (dirmask & 1) oid = &ci->stages[i].oid; buf[i] = fill_tree_descriptor(opt->repo, t+i, oid); } } ci->match_mask &= ci->filemask; opt->priv->current_dir_name = path; renames->dir_rename_mask = dir_rename_mask; if (renames->dir_rename_mask == 0 || renames->dir_rename_mask == 0x07) ret = traverse_trees(NULL, 3, t, info); else ret = traverse_trees_wrapper(NULL, 3, t, info); for (i = MERGE_BASE; i <= MERGE_SIDE2; i++) free(buf[i]); if (ret < 0) return ret; } strintmap_clear(©); strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges, &iter, entry) { const char *path = entry->key; struct conflict_info *ci; ci = strmap_get(&opt->priv->paths, path); VERIFY_CI(ci); assert(renames->deferred[side].trivial_merges_okay && !strset_contains(&renames->deferred[side].target_dirs, path)); resolve_trivial_directory_merge(ci, side); } if (!optimization_okay || path_count_after) path_count_after = strmap_get_size(&opt->priv->paths); } if (path_count_after) { /* * The choice of wanted_factor here does not affect * correctness, only performance. When the * path_count_after / path_count_before * ratio is high, redoing after renames is a big * performance boost. I suspect that redoing is a wash * somewhere near a value of 2, and below that redoing will * slow things down. I applied a fudge factor and picked * 3; see the commit message when this was introduced for * back of the envelope calculations for this ratio. */ const int wanted_factor = 3; /* We should only redo collect_merge_info one time */ assert(renames->redo_after_renames == 0); if (path_count_after / path_count_before >= wanted_factor) { renames->redo_after_renames = 1; renames->cached_pairs_valid_side = -1; } } else if (renames->redo_after_renames == 2) renames->redo_after_renames = 0; return ret; } static int collect_merge_info(struct merge_options *opt, struct tree *merge_base, struct tree *side1, struct tree *side2) { int ret; struct tree_desc t[3]; struct traverse_info info; opt->priv->toplevel_dir = ""; opt->priv->current_dir_name = opt->priv->toplevel_dir; setup_traverse_info(&info, opt->priv->toplevel_dir); info.fn = collect_merge_info_callback; info.data = opt; info.show_all_errors = 1; parse_tree(merge_base); parse_tree(side1); parse_tree(side2); init_tree_desc(t + 0, merge_base->buffer, merge_base->size); init_tree_desc(t + 1, side1->buffer, side1->size); init_tree_desc(t + 2, side2->buffer, side2->size); trace2_region_enter("merge", "traverse_trees", opt->repo); ret = traverse_trees(NULL, 3, t, &info); if (ret == 0) ret = handle_deferred_entries(opt, &info); trace2_region_leave("merge", "traverse_trees", opt->repo); return ret; } /*** Function Grouping: functions related to threeway content merges ***/ static int find_first_merges(struct repository *repo, const char *path, struct commit *a, struct commit *b, struct object_array *result) { int i, j; struct object_array merges = OBJECT_ARRAY_INIT; struct commit *commit; int contains_another; char merged_revision[GIT_MAX_HEXSZ + 2]; const char *rev_args[] = { "rev-list", "--merges", "--ancestry-path", "--all", merged_revision, NULL }; struct rev_info revs; struct setup_revision_opt rev_opts; memset(result, 0, sizeof(struct object_array)); memset(&rev_opts, 0, sizeof(rev_opts)); /* get all revisions that merge commit a */ xsnprintf(merged_revision, sizeof(merged_revision), "^%s", oid_to_hex(&a->object.oid)); repo_init_revisions(repo, &revs, NULL); /* FIXME: can't handle linked worktrees in submodules yet */ revs.single_worktree = path != NULL; setup_revisions(ARRAY_SIZE(rev_args)-1, rev_args, &revs, &rev_opts); /* save all revisions from the above list that contain b */ if (prepare_revision_walk(&revs)) die("revision walk setup failed"); while ((commit = get_revision(&revs)) != NULL) { struct object *o = &(commit->object); if (repo_in_merge_bases(repo, b, commit)) add_object_array(o, NULL, &merges); } reset_revision_walk(); /* Now we've got all merges that contain a and b. Prune all * merges that contain another found merge and save them in * result. */ for (i = 0; i < merges.nr; i++) { struct commit *m1 = (struct commit *) merges.objects[i].item; contains_another = 0; for (j = 0; j < merges.nr; j++) { struct commit *m2 = (struct commit *) merges.objects[j].item; if (i != j && repo_in_merge_bases(repo, m2, m1)) { contains_another = 1; break; } } if (!contains_another) add_object_array(merges.objects[i].item, NULL, result); } object_array_clear(&merges); return result->nr; } static int merge_submodule(struct merge_options *opt, const char *path, const struct object_id *o, const struct object_id *a, const struct object_id *b, struct object_id *result) { struct repository subrepo; struct strbuf sb = STRBUF_INIT; int ret = 0; struct commit *commit_o, *commit_a, *commit_b; int parent_count; struct object_array merges; int i; int search = !opt->priv->call_depth; /* store fallback answer in result in case we fail */ oidcpy(result, opt->priv->call_depth ? o : a); /* we can not handle deletion conflicts */ if (is_null_oid(o)) return 0; if (is_null_oid(a)) return 0; if (is_null_oid(b)) return 0; if (repo_submodule_init(&subrepo, opt->repo, path, null_oid())) { path_msg(opt, path, 0, _("Failed to merge submodule %s (not checked out)"), path); return 0; } if (!(commit_o = lookup_commit_reference(&subrepo, o)) || !(commit_a = lookup_commit_reference(&subrepo, a)) || !(commit_b = lookup_commit_reference(&subrepo, b))) { path_msg(opt, path, 0, _("Failed to merge submodule %s (commits not present)"), path); goto cleanup; } /* check whether both changes are forward */ if (!repo_in_merge_bases(&subrepo, commit_o, commit_a) || !repo_in_merge_bases(&subrepo, commit_o, commit_b)) { path_msg(opt, path, 0, _("Failed to merge submodule %s " "(commits don't follow merge-base)"), path); goto cleanup; } /* Case #1: a is contained in b or vice versa */ if (repo_in_merge_bases(&subrepo, commit_a, commit_b)) { oidcpy(result, b); path_msg(opt, path, 1, _("Note: Fast-forwarding submodule %s to %s"), path, oid_to_hex(b)); ret = 1; goto cleanup; } if (repo_in_merge_bases(&subrepo, commit_b, commit_a)) { oidcpy(result, a); path_msg(opt, path, 1, _("Note: Fast-forwarding submodule %s to %s"), path, oid_to_hex(a)); ret = 1; goto cleanup; } /* * Case #2: There are one or more merges that contain a and b in * the submodule. If there is only one, then present it as a * suggestion to the user, but leave it marked unmerged so the * user needs to confirm the resolution. */ /* Skip the search if makes no sense to the calling context. */ if (!search) goto cleanup; /* find commit which merges them */ parent_count = find_first_merges(&subrepo, path, commit_a, commit_b, &merges); switch (parent_count) { case 0: path_msg(opt, path, 0, _("Failed to merge submodule %s"), path); break; case 1: format_commit(&sb, 4, &subrepo, (struct commit *)merges.objects[0].item); path_msg(opt, path, 0, _("Failed to merge submodule %s, but a possible merge " "resolution exists:\n%s\n"), path, sb.buf); path_msg(opt, path, 1, _("If this is correct simply add it to the index " "for example\n" "by using:\n\n" " git update-index --cacheinfo 160000 %s \"%s\"\n\n" "which will accept this suggestion.\n"), oid_to_hex(&merges.objects[0].item->oid), path); strbuf_release(&sb); break; default: for (i = 0; i < merges.nr; i++) format_commit(&sb, 4, &subrepo, (struct commit *)merges.objects[i].item); path_msg(opt, path, 0, _("Failed to merge submodule %s, but multiple " "possible merges exist:\n%s"), path, sb.buf); strbuf_release(&sb); } object_array_clear(&merges); cleanup: repo_clear(&subrepo); return ret; } static void initialize_attr_index(struct merge_options *opt) { /* * The renormalize_buffer() functions require attributes, and * annoyingly those can only be read from the working tree or from * an index_state. merge-ort doesn't have an index_state, so we * generate a fake one containing only attribute information. */ struct merged_info *mi; struct index_state *attr_index = &opt->priv->attr_index; struct cache_entry *ce; attr_index->initialized = 1; if (!opt->renormalize) return; mi = strmap_get(&opt->priv->paths, GITATTRIBUTES_FILE); if (!mi) return; if (mi->clean) { int len = strlen(GITATTRIBUTES_FILE); ce = make_empty_cache_entry(attr_index, len); ce->ce_mode = create_ce_mode(mi->result.mode); ce->ce_flags = create_ce_flags(0); ce->ce_namelen = len; oidcpy(&ce->oid, &mi->result.oid); memcpy(ce->name, GITATTRIBUTES_FILE, len); add_index_entry(attr_index, ce, ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE); get_stream_filter(attr_index, GITATTRIBUTES_FILE, &ce->oid); } else { int stage, len; struct conflict_info *ci; ASSIGN_AND_VERIFY_CI(ci, mi); for (stage = 0; stage < 3; stage++) { unsigned stage_mask = (1 << stage); if (!(ci->filemask & stage_mask)) continue; len = strlen(GITATTRIBUTES_FILE); ce = make_empty_cache_entry(attr_index, len); ce->ce_mode = create_ce_mode(ci->stages[stage].mode); ce->ce_flags = create_ce_flags(stage); ce->ce_namelen = len; oidcpy(&ce->oid, &ci->stages[stage].oid); memcpy(ce->name, GITATTRIBUTES_FILE, len); add_index_entry(attr_index, ce, ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE); get_stream_filter(attr_index, GITATTRIBUTES_FILE, &ce->oid); } } } static int merge_3way(struct merge_options *opt, const char *path, const struct object_id *o, const struct object_id *a, const struct object_id *b, const char *pathnames[3], const int extra_marker_size, mmbuffer_t *result_buf) { mmfile_t orig, src1, src2; struct ll_merge_options ll_opts = {0}; char *base, *name1, *name2; int merge_status; if (!opt->priv->attr_index.initialized) initialize_attr_index(opt); ll_opts.renormalize = opt->renormalize; ll_opts.extra_marker_size = extra_marker_size; ll_opts.xdl_opts = opt->xdl_opts; if (opt->priv->call_depth) { ll_opts.virtual_ancestor = 1; ll_opts.variant = 0; } else { switch (opt->recursive_variant) { case MERGE_VARIANT_OURS: ll_opts.variant = XDL_MERGE_FAVOR_OURS; break; case MERGE_VARIANT_THEIRS: ll_opts.variant = XDL_MERGE_FAVOR_THEIRS; break; default: ll_opts.variant = 0; break; } } assert(pathnames[0] && pathnames[1] && pathnames[2] && opt->ancestor); if (pathnames[0] == pathnames[1] && pathnames[1] == pathnames[2]) { base = mkpathdup("%s", opt->ancestor); name1 = mkpathdup("%s", opt->branch1); name2 = mkpathdup("%s", opt->branch2); } else { base = mkpathdup("%s:%s", opt->ancestor, pathnames[0]); name1 = mkpathdup("%s:%s", opt->branch1, pathnames[1]); name2 = mkpathdup("%s:%s", opt->branch2, pathnames[2]); } read_mmblob(&orig, o); read_mmblob(&src1, a); read_mmblob(&src2, b); merge_status = ll_merge(result_buf, path, &orig, base, &src1, name1, &src2, name2, &opt->priv->attr_index, &ll_opts); free(base); free(name1); free(name2); free(orig.ptr); free(src1.ptr); free(src2.ptr); return merge_status; } static int handle_content_merge(struct merge_options *opt, const char *path, const struct version_info *o, const struct version_info *a, const struct version_info *b, const char *pathnames[3], const int extra_marker_size, struct version_info *result) { /* * path is the target location where we want to put the file, and * is used to determine any normalization rules in ll_merge. * * The normal case is that path and all entries in pathnames are * identical, though renames can affect which path we got one of * the three blobs to merge on various sides of history. * * extra_marker_size is the amount to extend conflict markers in * ll_merge; this is neeed if we have content merges of content * merges, which happens for example with rename/rename(2to1) and * rename/add conflicts. */ unsigned clean = 1; /* * handle_content_merge() needs both files to be of the same type, i.e. * both files OR both submodules OR both symlinks. Conflicting types * needs to be handled elsewhere. */ assert((S_IFMT & a->mode) == (S_IFMT & b->mode)); /* Merge modes */ if (a->mode == b->mode || a->mode == o->mode) result->mode = b->mode; else { /* must be the 100644/100755 case */ assert(S_ISREG(a->mode)); result->mode = a->mode; clean = (b->mode == o->mode); /* * FIXME: If opt->priv->call_depth && !clean, then we really * should not make result->mode match either a->mode or * b->mode; that causes t6036 "check conflicting mode for * regular file" to fail. It would be best to use some other * mode, but we'll confuse all kinds of stuff if we use one * where S_ISREG(result->mode) isn't true, and if we use * something like 0100666, then tree-walk.c's calls to * canon_mode() will just normalize that to 100644 for us and * thus not solve anything. * * Figure out if there's some kind of way we can work around * this... */ } /* * Trivial oid merge. * * Note: While one might assume that the next four lines would * be unnecessary due to the fact that match_mask is often * setup and already handled, renames don't always take care * of that. */ if (oideq(&a->oid, &b->oid) || oideq(&a->oid, &o->oid)) oidcpy(&result->oid, &b->oid); else if (oideq(&b->oid, &o->oid)) oidcpy(&result->oid, &a->oid); /* Remaining rules depend on file vs. submodule vs. symlink. */ else if (S_ISREG(a->mode)) { mmbuffer_t result_buf; int ret = 0, merge_status; int two_way; /* * If 'o' is different type, treat it as null so we do a * two-way merge. */ two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); merge_status = merge_3way(opt, path, two_way ? null_oid() : &o->oid, &a->oid, &b->oid, pathnames, extra_marker_size, &result_buf); if ((merge_status < 0) || !result_buf.ptr) ret = err(opt, _("Failed to execute internal merge")); if (!ret && write_object_file(result_buf.ptr, result_buf.size, blob_type, &result->oid)) ret = err(opt, _("Unable to add %s to database"), path); free(result_buf.ptr); if (ret) return -1; clean &= (merge_status == 0); path_msg(opt, path, 1, _("Auto-merging %s"), path); } else if (S_ISGITLINK(a->mode)) { int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); clean = merge_submodule(opt, pathnames[0], two_way ? null_oid() : &o->oid, &a->oid, &b->oid, &result->oid); if (opt->priv->call_depth && two_way && !clean) { result->mode = o->mode; oidcpy(&result->oid, &o->oid); } } else if (S_ISLNK(a->mode)) { if (opt->priv->call_depth) { clean = 0; result->mode = o->mode; oidcpy(&result->oid, &o->oid); } else { switch (opt->recursive_variant) { case MERGE_VARIANT_NORMAL: clean = 0; oidcpy(&result->oid, &a->oid); break; case MERGE_VARIANT_OURS: oidcpy(&result->oid, &a->oid); break; case MERGE_VARIANT_THEIRS: oidcpy(&result->oid, &b->oid); break; } } } else BUG("unsupported object type in the tree: %06o for %s", a->mode, path); return clean; } /*** Function Grouping: functions related to detect_and_process_renames(), *** *** which are split into directory and regular rename detection sections. ***/ /*** Function Grouping: functions related to directory rename detection ***/ struct collision_info { struct string_list source_files; unsigned reported_already:1; }; /* * Return a new string that replaces the beginning portion (which matches * rename_info->key), with rename_info->util.new_dir. In perl-speak: * new_path_name = (old_path =~ s/rename_info->key/rename_info->value/); * NOTE: * Caller must ensure that old_path starts with rename_info->key + '/'. */ static char *apply_dir_rename(struct strmap_entry *rename_info, const char *old_path) { struct strbuf new_path = STRBUF_INIT; const char *old_dir = rename_info->key; const char *new_dir = rename_info->value; int oldlen, newlen, new_dir_len; oldlen = strlen(old_dir); if (*new_dir == '\0') /* * If someone renamed/merged a subdirectory into the root * directory (e.g. 'some/subdir' -> ''), then we want to * avoid returning * '' + '/filename' * as the rename; we need to make old_path + oldlen advance * past the '/' character. */ oldlen++; new_dir_len = strlen(new_dir); newlen = new_dir_len + (strlen(old_path) - oldlen) + 1; strbuf_grow(&new_path, newlen); strbuf_add(&new_path, new_dir, new_dir_len); strbuf_addstr(&new_path, &old_path[oldlen]); return strbuf_detach(&new_path, NULL); } static int path_in_way(struct strmap *paths, const char *path, unsigned side_mask) { struct merged_info *mi = strmap_get(paths, path); struct conflict_info *ci; if (!mi) return 0; INITIALIZE_CI(ci, mi); return mi->clean || (side_mask & (ci->filemask | ci->dirmask)); } /* * See if there is a directory rename for path, and if there are any file * level conflicts on the given side for the renamed location. If there is * a rename and there are no conflicts, return the new name. Otherwise, * return NULL. */ static char *handle_path_level_conflicts(struct merge_options *opt, const char *path, unsigned side_index, struct strmap_entry *rename_info, struct strmap *collisions) { char *new_path = NULL; struct collision_