summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2018-04-10 08:25:43 +0900
committerLibravatar Junio C Hamano <gitster@pobox.com>2018-04-10 08:25:43 +0900
commite4bb62fa1eeee689744b413e29a50b4d1dae6886 (patch)
tree674f7d3cc3c82665a296216b952524b215302028
parentGit 2.17 (diff)
parentmerge-recursive: ensure we write updates for directory-renamed file (diff)
downloadtgif-e4bb62fa1eeee689744b413e29a50b4d1dae6886.tar.xz
Merge branch 'en/rename-directory-detection'
Rename detection logic in "diff" family that is used in "merge" has learned to guess when all of x/a, x/b and x/c have moved to z/a, z/b and z/c, it is likely that x/d added in the meantime would also want to move to z/d by taking the hint that the entire directory 'x' moved to 'z'. A bug causing dirty files involved in a rename to be overwritten during merge has also been fixed as part of this work. * en/rename-directory-detection: (29 commits) merge-recursive: ensure we write updates for directory-renamed file merge-recursive: avoid spurious rename/rename conflict from dir renames directory rename detection: new testcases showcasing a pair of bugs merge-recursive: fix remaining directory rename + dirty overwrite cases merge-recursive: fix overwriting dirty files involved in renames merge-recursive: avoid clobbering untracked files with directory renames merge-recursive: apply necessary modifications for directory renames merge-recursive: when comparing files, don't include trees merge-recursive: check for file level conflicts then get new name merge-recursive: add computation of collisions due to dir rename & merging merge-recursive: check for directory level conflicts merge-recursive: add get_directory_renames() merge-recursive: make a helper function for cleanup for handle_renames merge-recursive: split out code for determining diff_filepairs merge-recursive: make !o->detect_rename codepath more obvious merge-recursive: fix leaks of allocated renames and diff_filepairs merge-recursive: introduce new functions to handle rename logic merge-recursive: move the get_renames() function directory rename detection: tests for handling overwriting dirty files directory rename detection: tests for handling overwriting untracked files ...
-rw-r--r--merge-recursive.c1243
-rw-r--r--merge-recursive.h27
-rw-r--r--strbuf.c16
-rw-r--r--strbuf.h16
-rwxr-xr-xt/t3501-revert-cherry-pick.sh2
-rwxr-xr-xt/t6043-merge-rename-directories.sh3998
-rwxr-xr-xt/t7607-merge-overwrite.sh2
-rw-r--r--unpack-trees.c4
-rw-r--r--unpack-trees.h4
9 files changed, 5197 insertions, 115 deletions
diff --git a/merge-recursive.c b/merge-recursive.c
index 869092f7b9..55d1e779a0 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -49,6 +49,67 @@ static unsigned int path_hash(const char *path)
return ignore_case ? strihash(path) : strhash(path);
}
+static struct dir_rename_entry *dir_rename_find_entry(struct hashmap *hashmap,
+ char *dir)
+{
+ struct dir_rename_entry key;
+
+ if (dir == NULL)
+ return NULL;
+ hashmap_entry_init(&key, strhash(dir));
+ key.dir = dir;
+ return hashmap_get(hashmap, &key, NULL);
+}
+
+static int dir_rename_cmp(const void *unused_cmp_data,
+ const void *entry,
+ const void *entry_or_key,
+ const void *unused_keydata)
+{
+ const struct dir_rename_entry *e1 = entry;
+ const struct dir_rename_entry *e2 = entry_or_key;
+
+ return strcmp(e1->dir, e2->dir);
+}
+
+static void dir_rename_init(struct hashmap *map)
+{
+ hashmap_init(map, dir_rename_cmp, NULL, 0);
+}
+
+static void dir_rename_entry_init(struct dir_rename_entry *entry,
+ char *directory)
+{
+ hashmap_entry_init(entry, strhash(directory));
+ entry->dir = directory;
+ entry->non_unique_new_dir = 0;
+ strbuf_init(&entry->new_dir, 0);
+ string_list_init(&entry->possible_new_dirs, 0);
+}
+
+static struct collision_entry *collision_find_entry(struct hashmap *hashmap,
+ char *target_file)
+{
+ struct collision_entry key;
+
+ hashmap_entry_init(&key, strhash(target_file));
+ key.target_file = target_file;
+ return hashmap_get(hashmap, &key, NULL);
+}
+
+static int collision_cmp(void *unused_cmp_data,
+ const struct collision_entry *e1,
+ const struct collision_entry *e2,
+ const void *unused_keydata)
+{
+ return strcmp(e1->target_file, e2->target_file);
+}
+
+static void collision_init(struct hashmap *map)
+{
+ hashmap_init(map, (hashmap_cmp_fn) collision_cmp, NULL, 0);
+}
+
static void flush_output(struct merge_options *o)
{
if (o->buffer_output < 2 && o->obuf.len) {
@@ -119,6 +180,7 @@ static int oid_eq(const struct object_id *a, const struct object_id *b)
enum rename_type {
RENAME_NORMAL = 0,
+ RENAME_DIR,
RENAME_DELETE,
RENAME_ONE_FILE_TO_ONE,
RENAME_ONE_FILE_TO_TWO,
@@ -275,32 +337,37 @@ static void init_tree_desc_from_tree(struct tree_desc *desc, struct tree *tree)
init_tree_desc(desc, tree->buffer, tree->size);
}
-static int git_merge_trees(int index_only,
+static int git_merge_trees(struct merge_options *o,
struct tree *common,
struct tree *head,
struct tree *merge)
{
int rc;
struct tree_desc t[3];
- struct unpack_trees_options opts;
- memset(&opts, 0, sizeof(opts));
- if (index_only)
- opts.index_only = 1;
+ memset(&o->unpack_opts, 0, sizeof(o->unpack_opts));
+ if (o->call_depth)
+ o->unpack_opts.index_only = 1;
else
- opts.update = 1;
- opts.merge = 1;
- opts.head_idx = 2;
- opts.fn = threeway_merge;
- opts.src_index = &the_index;
- opts.dst_index = &the_index;
- setup_unpack_trees_porcelain(&opts, "merge");
+ o->unpack_opts.update = 1;
+ o->unpack_opts.merge = 1;
+ o->unpack_opts.head_idx = 2;
+ o->unpack_opts.fn = threeway_merge;
+ o->unpack_opts.src_index = &the_index;
+ o->unpack_opts.dst_index = &the_index;
+ setup_unpack_trees_porcelain(&o->unpack_opts, "merge");
init_tree_desc_from_tree(t+0, common);
init_tree_desc_from_tree(t+1, head);
init_tree_desc_from_tree(t+2, merge);
- rc = unpack_trees(3, t, &opts);
+ rc = unpack_trees(3, t, &o->unpack_opts);
+ /*
+ * unpack_trees NULLifies src_index, but it's used in verify_uptodate,
+ * so set to the new index which will usually have modification
+ * timestamp info copied over.
+ */
+ o->unpack_opts.src_index = &the_index;
cache_tree_free(&active_cache_tree);
return rc;
}
@@ -360,6 +427,21 @@ static void get_files_dirs(struct merge_options *o, struct tree *tree)
read_tree_recursive(tree, "", 0, 0, &match_all, save_files_dirs, o);
}
+static int get_tree_entry_if_blob(const unsigned char *tree,
+ const char *path,
+ unsigned char *hashy,
+ unsigned int *mode_o)
+{
+ int ret;
+
+ ret = get_tree_entry(tree, path, hashy, mode_o);
+ if (S_ISDIR(*mode_o)) {
+ hashcpy(hashy, null_sha1);
+ *mode_o = 0;
+ }
+ return ret;
+}
+
/*
* Returns an index_entry instance which doesn't have to correspond to
* a real cache entry in Git's index.
@@ -370,12 +452,12 @@ static struct stage_data *insert_stage_data(const char *path,
{
struct string_list_item *item;
struct stage_data *e = xcalloc(1, sizeof(struct stage_data));
- get_tree_entry(o->object.oid.hash, path,
- e->stages[1].oid.hash, &e->stages[1].mode);
- get_tree_entry(a->object.oid.hash, path,
- e->stages[2].oid.hash, &e->stages[2].mode);
- get_tree_entry(b->object.oid.hash, path,
- e->stages[3].oid.hash, &e->stages[3].mode);
+ get_tree_entry_if_blob(o->object.oid.hash, path,
+ e->stages[1].oid.hash, &e->stages[1].mode);
+ get_tree_entry_if_blob(a->object.oid.hash, path,
+ e->stages[2].oid.hash, &e->stages[2].mode);
+ get_tree_entry_if_blob(b->object.oid.hash, path,
+ e->stages[3].oid.hash, &e->stages[3].mode);
item = string_list_insert(entries, path);
item->util = e;
return e;
@@ -534,78 +616,10 @@ struct rename {
*/
struct stage_data *src_entry;
struct stage_data *dst_entry;
+ unsigned add_turned_into_rename:1;
unsigned processed:1;
};
-/*
- * Get information of all renames which occurred between 'o_tree' and
- * 'tree'. We need the three trees in the merge ('o_tree', 'a_tree' and
- * 'b_tree') to be able to associate the correct cache entries with
- * the rename information. 'tree' is always equal to either a_tree or b_tree.
- */
-static struct string_list *get_renames(struct merge_options *o,
- struct tree *tree,
- struct tree *o_tree,
- struct tree *a_tree,
- struct tree *b_tree,
- struct string_list *entries)
-{
- int i;
- struct string_list *renames;
- struct diff_options opts;
-
- renames = xcalloc(1, sizeof(struct string_list));
- if (!o->detect_rename)
- return renames;
-
- diff_setup(&opts);
- opts.flags.recursive = 1;
- opts.flags.rename_empty = 0;
- opts.detect_rename = DIFF_DETECT_RENAME;
- opts.rename_limit = o->merge_rename_limit >= 0 ? o->merge_rename_limit :
- o->diff_rename_limit >= 0 ? o->diff_rename_limit :
- 1000;
- opts.rename_score = o->rename_score;
- opts.show_rename_progress = o->show_rename_progress;
- opts.output_format = DIFF_FORMAT_NO_OUTPUT;
- diff_setup_done(&opts);
- diff_tree_oid(&o_tree->object.oid, &tree->object.oid, "", &opts);
- diffcore_std(&opts);
- if (opts.needed_rename_limit > o->needed_rename_limit)
- o->needed_rename_limit = opts.needed_rename_limit;
- for (i = 0; i < diff_queued_diff.nr; ++i) {
- struct string_list_item *item;
- struct rename *re;
- struct diff_filepair *pair = diff_queued_diff.queue[i];
- if (pair->status != 'R') {
- diff_free_filepair(pair);
- continue;
- }
- re = xmalloc(sizeof(*re));
- re->processed = 0;
- re->pair = pair;
- item = string_list_lookup(entries, re->pair->one->path);
- if (!item)
- re->src_entry = insert_stage_data(re->pair->one->path,
- o_tree, a_tree, b_tree, entries);
- else
- re->src_entry = item->util;
-
- item = string_list_lookup(entries, re->pair->two->path);
- if (!item)
- re->dst_entry = insert_stage_data(re->pair->two->path,
- o_tree, a_tree, b_tree, entries);
- else
- re->dst_entry = item->util;
- item = string_list_insert(renames, pair->one->path);
- item->util = re;
- }
- opts.output_format = DIFF_FORMAT_NO_OUTPUT;
- diff_queued_diff.nr = 0;
- diff_flush(&opts);
- return renames;
-}
-
static int update_stages(struct merge_options *opt, const char *path,
const struct diff_filespec *o,
const struct diff_filespec *a,
@@ -637,6 +651,27 @@ static int update_stages(struct merge_options *opt, const char *path,
return 0;
}
+static int update_stages_for_stage_data(struct merge_options *opt,
+ const char *path,
+ const struct stage_data *stage_data)
+{
+ struct diff_filespec o, a, b;
+
+ o.mode = stage_data->stages[1].mode;
+ oidcpy(&o.oid, &stage_data->stages[1].oid);
+
+ a.mode = stage_data->stages[2].mode;
+ oidcpy(&a.oid, &stage_data->stages[2].oid);
+
+ b.mode = stage_data->stages[3].mode;
+ oidcpy(&b.oid, &stage_data->stages[3].oid);
+
+ return update_stages(opt, path,
+ is_null_oid(&o.oid) ? NULL : &o,
+ is_null_oid(&a.oid) ? NULL : &a,
+ is_null_oid(&b.oid) ? NULL : &b);
+}
+
static void update_entry(struct stage_data *entry,
struct diff_filespec *o,
struct diff_filespec *a,
@@ -765,6 +800,20 @@ static int would_lose_untracked(const char *path)
return !was_tracked(path) && file_exists(path);
}
+static int was_dirty(struct merge_options *o, const char *path)
+{
+ struct cache_entry *ce;
+ int dirty = 1;
+
+ if (o->call_depth || !was_tracked(path))
+ return !dirty;
+
+ ce = cache_file_exists(path, strlen(path), ignore_case);
+ dirty = (ce->ce_stat_data.sd_mtime.sec > 0 &&
+ verify_uptodate(ce, &o->unpack_opts) != 0);
+ return dirty;
+}
+
static int make_room_for_path(struct merge_options *o, const char *path)
{
int status, i;
@@ -1114,6 +1163,38 @@ static int merge_file_one(struct merge_options *o,
return merge_file_1(o, &one, &a, &b, branch1, branch2, mfi);
}
+static int conflict_rename_dir(struct merge_options *o,
+ struct diff_filepair *pair,
+ const char *rename_branch,
+ const char *other_branch)
+{
+ const struct diff_filespec *dest = pair->two;
+
+ if (!o->call_depth && would_lose_untracked(dest->path)) {
+ char *alt_path = unique_path(o, dest->path, rename_branch);
+
+ output(o, 1, _("Error: Refusing to lose untracked file at %s; "
+ "writing to %s instead."),
+ dest->path, alt_path);
+ /*
+ * Write the file in worktree at alt_path, but not in the
+ * index. Instead, write to dest->path for the index but
+ * only at the higher appropriate stage.
+ */
+ if (update_file(o, 0, &dest->oid, dest->mode, alt_path))
+ return -1;
+ free(alt_path);
+ return update_stages(o, dest->path, NULL,
+ rename_branch == o->branch1 ? dest : NULL,
+ rename_branch == o->branch1 ? NULL : dest);
+ }
+
+ /* Update dest->path both in index and in worktree */
+ if (update_file(o, 1, &dest->oid, dest->mode, dest->path))
+ return -1;
+ return 0;
+}
+
static int handle_change_delete(struct merge_options *o,
const char *path, const char *old_path,
const struct object_id *o_oid, int o_mode,
@@ -1127,7 +1208,8 @@ static int handle_change_delete(struct merge_options *o,
const char *update_path = path;
int ret = 0;
- if (dir_in_way(path, !o->call_depth, 0)) {
+ if (dir_in_way(path, !o->call_depth, 0) ||
+ (!o->call_depth && would_lose_untracked(path))) {
update_path = alt_path = unique_path(o, path, change_branch);
}
@@ -1242,17 +1324,34 @@ static int handle_file(struct merge_options *o,
add = filespec_from_entry(&other, dst_entry, stage ^ 1);
if (add) {
+ int ren_src_was_dirty = was_dirty(o, rename->path);
char *add_name = unique_path(o, rename->path, other_branch);
if (update_file(o, 0, &add->oid, add->mode, add_name))
return -1;
- remove_file(o, 0, rename->path, 0);
+ if (ren_src_was_dirty) {
+ output(o, 1, _("Refusing to lose dirty file at %s"),
+ rename->path);
+ }
+ /*
+ * Because the double negatives somehow keep confusing me...
+ * 1) update_wd iff !ren_src_was_dirty.
+ * 2) no_wd iff !update_wd
+ * 3) so, no_wd == !!ren_src_was_dirty == ren_src_was_dirty
+ */
+ remove_file(o, 0, rename->path, ren_src_was_dirty);
dst_name = unique_path(o, rename->path, cur_branch);
} else {
if (dir_in_way(rename->path, !o->call_depth, 0)) {
dst_name = unique_path(o, rename->path, cur_branch);
output(o, 1, _("%s is a directory in %s adding as %s instead"),
rename->path, other_branch, dst_name);
+ } else if (!o->call_depth &&
+ would_lose_untracked(rename->path)) {
+ dst_name = unique_path(o, rename->path, cur_branch);
+ output(o, 1, _("Refusing to lose untracked file at %s; "
+ "adding as %s instead"),
+ rename->path, dst_name);
}
}
if ((ret = update_file(o, 0, &rename->oid, rename->mode, dst_name)))
@@ -1378,11 +1477,43 @@ static int conflict_rename_rename_2to1(struct merge_options *o,
char *new_path2 = unique_path(o, path, ci->branch2);
output(o, 1, _("Renaming %s to %s and %s to %s instead"),
a->path, new_path1, b->path, new_path2);
- remove_file(o, 0, path, 0);
+ if (was_dirty(o, path))
+ output(o, 1, _("Refusing to lose dirty file at %s"),
+ path);
+ else if (would_lose_untracked(path))
+ /*
+ * Only way we get here is if both renames were from
+ * a directory rename AND user had an untracked file
+ * at the location where both files end up after the
+ * two directory renames. See testcase 10d of t6043.
+ */
+ output(o, 1, _("Refusing to lose untracked file at "
+ "%s, even though it's in the way."),
+ path);
+ else
+ remove_file(o, 0, path, 0);
ret = update_file(o, 0, &mfi_c1.oid, mfi_c1.mode, new_path1);
if (!ret)
ret = update_file(o, 0, &mfi_c2.oid, mfi_c2.mode,
new_path2);
+ /*
+ * unpack_trees() actually populates the index for us for
+ * "normal" rename/rename(2to1) situtations so that the
+ * correct entries are at the higher stages, which would
+ * make the call below to update_stages_for_stage_data
+ * unnecessary. However, if either of the renames came
+ * from a directory rename, then unpack_trees() will not
+ * have gotten the right data loaded into the index, so we
+ * need to do so now. (While it'd be tempting to move this
+ * call to update_stages_for_stage_data() to
+ * apply_directory_rename_modifications(), that would break
+ * our intermediate calls to would_lose_untracked() since
+ * those rely on the current in-memory index. See also the
+ * big "NOTE" in update_stages()).
+ */
+ if (update_stages_for_stage_data(o, path, ci->dst_entry1))
+ ret = -1;
+
free(new_path2);
free(new_path1);
}
@@ -1390,6 +1521,754 @@ static int conflict_rename_rename_2to1(struct merge_options *o,
return ret;
}
+/*
+ * Get the diff_filepairs changed between o_tree and tree.
+ */
+static struct diff_queue_struct *get_diffpairs(struct merge_options *o,
+ struct tree *o_tree,
+ struct tree *tree)
+{
+ struct diff_queue_struct *ret;
+ struct diff_options opts;
+
+ diff_setup(&opts);
+ opts.flags.recursive = 1;
+ opts.flags.rename_empty = 0;
+ opts.detect_rename = DIFF_DETECT_RENAME;
+ opts.rename_limit = o->merge_rename_limit >= 0 ? o->merge_rename_limit :
+ o->diff_rename_limit >= 0 ? o->diff_rename_limit :
+ 1000;
+ opts.rename_score = o->rename_score;
+ opts.show_rename_progress = o->show_rename_progress;
+ opts.output_format = DIFF_FORMAT_NO_OUTPUT;
+ diff_setup_done(&opts);
+ diff_tree_oid(&o_tree->object.oid, &tree->object.oid, "", &opts);
+ diffcore_std(&opts);
+ if (opts.needed_rename_limit > o->needed_rename_limit)
+ o->needed_rename_limit = opts.needed_rename_limit;
+
+ ret = xmalloc(sizeof(*ret));
+ *ret = diff_queued_diff;
+
+ opts.output_format = DIFF_FORMAT_NO_OUTPUT;
+ diff_queued_diff.nr = 0;
+ diff_queued_diff.queue = NULL;
+ diff_flush(&opts);
+ return ret;
+}
+
+static int tree_has_path(struct tree *tree, const char *path)
+{
+ unsigned char hashy[GIT_MAX_RAWSZ];
+ unsigned int mode_o;
+
+ return !get_tree_entry(tree->object.oid.hash, path,
+ hashy, &mode_o);
+}
+
+/*
+ * Return a new string that replaces the beginning portion (which matches
+ * entry->dir), with entry->new_dir. In perl-speak:
+ * new_path_name = (old_path =~ s/entry->dir/entry->new_dir/);
+ * NOTE:
+ * Caller must ensure that old_path starts with entry->dir + '/'.
+ */
+static char *apply_dir_rename(struct dir_rename_entry *entry,
+ const char *old_path)
+{
+ struct strbuf new_path = STRBUF_INIT;
+ int oldlen, newlen;
+
+ if (entry->non_unique_new_dir)
+ return NULL;
+
+ oldlen = strlen(entry->dir);
+ newlen = entry->new_dir.len + (strlen(old_path) - oldlen) + 1;
+ strbuf_grow(&new_path, newlen);
+ strbuf_addbuf(&new_path, &entry->new_dir);
+ strbuf_addstr(&new_path, &old_path[oldlen]);
+
+ return strbuf_detach(&new_path, NULL);
+}
+
+static void get_renamed_dir_portion(const char *old_path, const char *new_path,
+ char **old_dir, char **new_dir)
+{
+ char *end_of_old, *end_of_new;
+ int old_len, new_len;
+
+ *old_dir = NULL;
+ *new_dir = NULL;
+
+ /*
+ * For
+ * "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
+ * the "e/foo.c" part is the same, we just want to know that
+ * "a/b/c/d" was renamed to "a/b/some/thing/else"
+ * so, for this example, this function returns "a/b/c/d" in
+ * *old_dir and "a/b/some/thing/else" in *new_dir.
+ *
+ * Also, if the basename of the file changed, we don't care. We
+ * want to know which portion of the directory, if any, changed.
+ */
+ end_of_old = strrchr(old_path, '/');
+ end_of_new = strrchr(new_path, '/');
+
+ if (end_of_old == NULL || end_of_new == NULL)
+ return;
+ while (*--end_of_new == *--end_of_old &&
+ end_of_old != old_path &&
+ end_of_new != new_path)
+ ; /* Do nothing; all in the while loop */
+ /*
+ * We've found the first non-matching character in the directory
+ * paths. That means the current directory we were comparing
+ * represents the rename. Move end_of_old and end_of_new back
+ * to the full directory name.
+ */
+ if (*end_of_old == '/')
+ end_of_old++;
+ if (*end_of_old != '/')
+ end_of_new++;
+ end_of_old = strchr(end_of_old, '/');
+ end_of_new = strchr(end_of_new, '/');
+
+ /*
+ * It may have been the case that old_path and new_path were the same
+ * directory all along. Don't claim a rename if they're the same.
+ */
+ old_len = end_of_old - old_path;
+ new_len = end_of_new - new_path;
+
+ if (old_len != new_len || strncmp(old_path, new_path, old_len)) {
+ *old_dir = xstrndup(old_path, old_len);
+ *new_dir = xstrndup(new_path, new_len);
+ }
+}
+
+static void remove_hashmap_entries(struct hashmap *dir_renames,
+ struct string_list *items_to_remove)
+{
+ int i;
+ struct dir_rename_entry *entry;
+
+ for (i = 0; i < items_to_remove->nr; i++) {
+ entry = items_to_remove->items[i].util;
+ hashmap_remove(dir_renames, entry, NULL);
+ }
+ string_list_clear(items_to_remove, 0);
+}
+
+/*
+ * See if there is a directory rename for path, and if there are any file
+ * level conflicts for the renamed location. If there is a rename and
+ * there are no conflicts, return the new name. Otherwise, return NULL.
+ */
+static char *handle_path_level_conflicts(struct merge_options *o,
+ const char *path,
+ struct dir_rename_entry *entry,
+ struct hashmap *collisions,
+ struct tree *tree)
+{
+ char *new_path = NULL;
+ struct collision_entry *collision_ent;
+ int clean = 1;
+ struct strbuf collision_paths = STRBUF_INIT;
+
+ /*
+ * entry has the mapping of old directory name to new directory name
+ * that we want to apply to path.
+ */
+ new_path = apply_dir_rename(entry, path);
+
+ if (!new_path) {
+ /* This should only happen when entry->non_unique_new_dir set */
+ if (!entry->non_unique_new_dir)
+ BUG("entry->non_unqiue_dir not set and !new_path");
+ output(o, 1, _("CONFLICT (directory rename split): "
+ "Unclear where to place %s because directory "
+ "%s was renamed to multiple other directories, "
+ "with no destination getting a majority of the "
+ "files."),
+ path, entry->dir);
+ clean = 0;
+ return NULL;
+ }
+
+ /*
+ * The caller needs to have ensured that it has pre-populated
+ * collisions with all paths that map to new_path. Do a quick check
+ * to ensure that's the case.
+ */
+ collision_ent = collision_find_entry(collisions, new_path);
+ if (collision_ent == NULL)
+ BUG("collision_ent is NULL");
+
+ /*
+ * Check for one-sided add/add/.../add conflicts, i.e.
+ * where implicit renames from the other side doing
+ * directory rename(s) can affect this side of history
+ * to put multiple paths into the same location. Warn
+ * and bail on directory renames for such paths.
+ */
+ if (collision_ent->reported_already) {
+ clean = 0;
+ } else if (tree_has_path(tree, new_path)) {
+ collision_ent->reported_already = 1;
+ strbuf_add_separated_string_list(&collision_paths, ", ",
+ &collision_ent->source_files);
+ output(o, 1, _("CONFLICT (implicit dir rename): Existing "
+ "file/dir at %s in the way of implicit "
+ "directory rename(s) putting the following "
+ "path(s) there: %s."),
+ new_path, collision_paths.buf);
+ clean = 0;
+ } else if (collision_ent->source_files.nr > 1) {
+ collision_ent->reported_already = 1;
+ strbuf_add_separated_string_list(&collision_paths, ", ",
+ &collision_ent->source_files);
+ output(o, 1, _("CONFLICT (implicit dir rename): Cannot map "
+ "more than one path to %s; implicit directory "
+ "renames tried to put these paths there: %s"),
+ new_path, collision_paths.buf);
+ clean = 0;
+ }
+
+ /* Free memory we no longer need */
+ strbuf_release(&collision_paths);
+ if (!clean && new_path) {
+ free(new_path);
+ return NULL;
+ }
+
+ return new_path;
+}
+
+/*
+ * There are a couple things we want to do at the directory level:
+ * 1. Check for both sides renaming to the same thing, in order to avoid
+ * implicit renaming of files that should be left in place. (See
+ * testcase 6b in t6043 for details.)
+ * 2. Prune directory renames if there are still files left in the
+ * the original directory. These represent a partial directory rename,
+ * i.e. a rename where only some of the files within the directory
+ * were renamed elsewhere. (Technically, this could be done earlier
+ * in get_directory_renames(), except that would prevent us from
+ * doing the previous check and thus failing testcase 6b.)
+ * 3. Check for rename/rename(1to2) conflicts (at the directory level).
+ * In the future, we could potentially record this info as well and
+ * omit reporting rename/rename(1to2) conflicts for each path within
+ * the affected directories, thus cleaning up the merge output.
+ * NOTE: We do NOT check for rename/rename(2to1) conflicts at the
+ * directory level, because merging directories is fine. If it
+ * causes conflicts for files within those merged directories, then
+ * that should be detected at the individual path level.
+ */
+static void handle_directory_level_conflicts(struct merge_options *o,
+ struct hashmap *dir_re_head,
+ struct tree *head,
+ struct hashmap *dir_re_merge,
+ struct tree *merge)
+{
+ struct hashmap_iter iter;
+ struct dir_rename_entry *head_ent;
+ struct dir_rename_entry *merge_ent;
+
+ struct string_list remove_from_head = STRING_LIST_INIT_NODUP;
+ struct string_list remove_from_merge = STRING_LIST_INIT_NODUP;
+
+ hashmap_iter_init(dir_re_head, &iter);
+ while ((head_ent = hashmap_iter_next(&iter))) {
+ merge_ent = dir_rename_find_entry(dir_re_merge, head_ent->dir);
+ if (merge_ent &&
+ !head_ent->non_unique_new_dir &&
+ !merge_ent->non_unique_new_dir &&
+ !strbuf_cmp(&head_ent->new_dir, &merge_ent->new_dir)) {
+ /* 1. Renamed identically; remove it from both sides */
+ string_list_append(&remove_from_head,
+ head_ent->dir)->util = head_ent;
+ strbuf_release(&head_ent->new_dir);
+ string_list_append(&remove_from_merge,
+ merge_ent->dir)->util = merge_ent;
+ strbuf_release(&merge_ent->new_dir);
+ } else if (tree_has_path(head, head_ent->dir)) {
+ /* 2. This wasn't a directory rename after all */
+ string_list_append(&remove_from_head,
+ head_ent->dir)->util = head_ent;
+ strbuf_release(&head_ent->new_dir);
+ }
+ }
+
+ remove_hashmap_entries(dir_re_head, &remove_from_head);
+ remove_hashmap_entries(dir_re_merge, &remove_from_merge);
+
+ hashmap_iter_init(dir_re_merge, &iter);
+ while ((merge_ent = hashmap_iter_next(&iter))) {
+ head_ent = dir_rename_find_entry(dir_re_head, merge_ent->dir);
+ if (tree_has_path(merge, merge_ent->dir)) {
+ /* 2. This wasn't a directory rename after all */
+ string_list_append(&remove_from_merge,
+ merge_ent->dir)->util = merge_ent;
+ } else if (head_ent &&
+ !head_ent->non_unique_new_dir &&
+ !merge_ent->non_unique_new_dir) {
+ /* 3. rename/rename(1to2) */
+ /*
+ * We can assume it's not rename/rename(1to1) because
+ * that was case (1), already checked above. So we
+ * know that head_ent->new_dir and merge_ent->new_dir
+ * are different strings.
+ */
+ output(o, 1, _("CONFLICT (rename/rename): "
+ "Rename directory %s->%s in %s. "
+ "Rename directory %s->%s in %s"),
+ head_ent->dir, head_ent->new_dir.buf, o->branch1,
+ head_ent->dir, merge_ent->new_dir.buf, o->branch2);
+ string_list_append(&remove_from_head,
+ head_ent->dir)->util = head_ent;
+ strbuf_release(&head_ent->new_dir);
+ string_list_append(&remove_from_merge,
+ merge_ent->dir)->util = merge_ent;
+ strbuf_release(&merge_ent->new_dir);
+ }
+ }
+
+ remove_hashmap_entries(dir_re_head, &remove_from_head);
+ remove_hashmap_entries(dir_re_merge, &remove_from_merge);
+}
+
+static struct hashmap *get_directory_renames(struct diff_queue_struct *pairs,
+ struct tree *tree)
+{
+ struct hashmap *dir_renames;
+ struct hashmap_iter iter;
+ struct dir_rename_entry *entry;
+ int i;
+
+ /*
+ * Typically, we think of a directory rename as all files from a
+ * certain directory being moved to a target directory. However,
+ * what if someone first moved two files from the original
+ * directory in one commit, and then renamed the directory
+ * somewhere else in a later commit? At merge time, we just know
+ * that files from the original directory went to two different
+ * places, and that the bulk of them ended up in the same place.
+ * We want each directory rename to represent where the bulk of the
+ * files from that directory end up; this function exists to find
+ * where the bulk of the files went.
+ *
+ * The first loop below simply iterates through the list of file
+ * renames, finding out how often each directory rename pair
+ * possibility occurs.
+ */
+ dir_renames = xmalloc(sizeof(struct hashmap));
+ dir_rename_init(dir_renames);
+ for (i = 0; i < pairs->nr; ++i) {
+ struct string_list_item *item;
+ int *count;
+ struct diff_filepair *pair = pairs->queue[i];
+ char *old_dir, *new_dir;
+
+ /* File not part of directory rename if it wasn't renamed */
+ if (pair->status != 'R')
+ continue;
+
+ get_renamed_dir_portion(pair->one->path, pair->two->path,
+ &old_dir, &new_dir);
+ if (!old_dir)
+ /* Directory didn't change at all; ignore this one. */
+ continue;
+
+ entry = dir_rename_find_entry(dir_renames, old_dir);
+ if (!entry) {
+ entry = xmalloc(sizeof(struct dir_rename_entry));
+ dir_rename_entry_init(entry, old_dir);
+ hashmap_put(dir_renames, entry);
+ } else {
+ free(old_dir);
+ }
+ item = string_list_lookup(&entry->possible_new_dirs, new_dir);
+ if (!item) {
+ item = string_list_insert(&entry->possible_new_dirs,
+ new_dir);
+ item->util = xcalloc(1, sizeof(int));
+ } else {
+ free(new_dir);
+ }
+ count = item->util;
+ *count += 1;
+ }
+
+ /*
+ * For each directory with files moved out of it, we find out which
+ * target directory received the most files so we can declare it to
+ * be the "winning" target location for the directory rename. This
+ * winner gets recorded in new_dir. If there is no winner
+ * (multiple target directories received the same number of files),
+ * we set non_unique_new_dir. Once we've determined the winner (or
+ * that there is no winner), we no longer need possible_new_dirs.
+ */
+ hashmap_iter_init(dir_renames, &iter);
+ while ((entry = hashmap_iter_next(&iter))) {
+ int max = 0;
+ int bad_max = 0;
+ char *best = NULL;
+
+ for (i = 0; i < entry->possible_new_dirs.nr; i++) {
+ int *count = entry->possible_new_dirs.items[i].util;
+
+ if (*count == max)
+ bad_max = max;
+ else if (*count > max) {
+ max = *count;
+ best = entry->possible_new_dirs.items[i].string;
+ }
+ }
+ if (bad_max == max)
+ entry->non_unique_new_dir = 1;
+ else {
+ assert(entry->new_dir.len == 0);
+ strbuf_addstr(&entry->new_dir, best);
+ }
+ /*
+ * The relevant directory sub-portion of the original full
+ * filepaths were xstrndup'ed before inserting into
+ * possible_new_dirs, and instead of manually iterating the
+ * list and free'ing each, just lie and tell
+ * possible_new_dirs that it did the strdup'ing so that it
+ * will free them for us.
+ */
+ entry->possible_new_dirs.strdup_strings = 1;
+ string_list_clear(&entry->possible_new_dirs, 1);
+ }
+
+ return dir_renames;
+}
+
+static struct dir_rename_entry *check_dir_renamed(const char *path,
+ struct hashmap *dir_renames)
+{
+ char temp[PATH_MAX];
+ char *end;
+ struct dir_rename_entry *entry;
+
+ strcpy(temp, path);
+ while ((end = strrchr(temp, '/'))) {
+ *end = '\0';
+ entry = dir_rename_find_entry(dir_renames, temp);
+ if (entry)
+ return entry;
+ }
+ return NULL;
+}
+
+static void compute_collisions(struct hashmap *collisions,
+ struct hashmap *dir_renames,
+ struct diff_queue_struct *pairs)
+{
+ int i;
+
+ /*
+ * Multiple files can be mapped to the same path due to directory
+ * renames done by the other side of history. Since that other
+ * side of history could have merged multiple directories into one,
+ * if our side of history added the same file basename to each of
+ * those directories, then all N of them would get implicitly
+ * renamed by the directory rename detection into the same path,
+ * and we'd get an add/add/.../add conflict, and all those adds
+ * from *this* side of history. This is not representable in the
+ * index, and users aren't going to easily be able to make sense of
+ * it. So we need to provide a good warning about what's
+ * happening, and fall back to no-directory-rename detection
+ * behavior for those paths.
+ *
+ * See testcases 9e and all of section 5 from t6043 for examples.
+ */
+ collision_init(collisions);
+
+ for (i = 0; i < pairs->nr; ++i) {
+ struct dir_rename_entry *dir_rename_ent;
+ struct collision_entry *collision_ent;
+ char *new_path;
+ struct diff_filepair *pair = pairs->queue[i];
+
+ if (pair->status != 'A' && pair->status != 'R')
+ continue;
+ dir_rename_ent = check_dir_renamed(pair->two->path,
+ dir_renames);
+ if (!dir_rename_ent)
+ continue;
+
+ new_path = apply_dir_rename(dir_rename_ent, pair->two->path);
+ if (!new_path)
+ /*
+ * dir_rename_ent->non_unique_new_path is true, which
+ * means there is no directory rename for us to use,
+ * which means it won't cause us any additional
+ * collisions.
+ */
+ continue;
+ collision_ent = collision_find_entry(collisions, new_path);
+ if (!collision_ent) {
+ collision_ent = xcalloc(1,
+ sizeof(struct collision_entry));
+ hashmap_entry_init(collision_ent, strhash(new_path));
+ hashmap_put(collisions, collision_ent);
+ collision_ent->target_file = new_path;
+ } else {
+ free(new_path);
+ }
+ string_list_insert(&collision_ent->source_files,
+ pair->two->path);
+ }
+}
+
+static char *check_for_directory_rename(struct merge_options *o,
+ const char *path,
+ struct tree *tree,
+ struct hashmap *dir_renames,