summaryrefslogtreecommitdiff
path: root/merge-ort.c
diff options
context:
space:
mode:
Diffstat (limited to 'merge-ort.c')
-rw-r--r--merge-ort.c704
1 files changed, 621 insertions, 83 deletions
diff --git a/merge-ort.c b/merge-ort.c
index 5e118a85ee..b954f7184a 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -18,6 +18,7 @@
#include "merge-ort.h"
#include "alloc.h"
+#include "attr.h"
#include "blob.h"
#include "cache-tree.h"
#include "commit.h"
@@ -25,6 +26,7 @@
#include "diff.h"
#include "diffcore.h"
#include "dir.h"
+#include "entry.h"
#include "ll-merge.h"
#include "object-store.h"
#include "revision.h"
@@ -51,6 +53,8 @@ enum merge_side {
MERGE_SIDE2 = 2
};
+static unsigned RESULT_INITIALIZED = 0x1abe11ed; /* unlikely accidental value */
+
struct traversal_callback_data {
unsigned long mask;
unsigned long dirmask;
@@ -73,8 +77,12 @@ struct rename_info {
/*
* dirs_removed: directories removed on a given side of history.
+ *
+ * The keys of dirs_removed[side] are the directories that were removed
+ * on the given side of history. The value of the strintmap for each
+ * directory is a value from enum dir_rename_relevance.
*/
- struct strset dirs_removed[3];
+ struct strintmap dirs_removed[3];
/*
* dir_rename_count: tracking where parts of a directory were renamed to
@@ -95,18 +103,20 @@ struct rename_info {
struct strmap dir_renames[3];
/*
- * relevant_sources: deleted paths for which we need rename detection
+ * relevant_sources: deleted paths wanted in rename detection, and why
*
* relevant_sources is a set of deleted paths on each side of
* history for which we need rename detection. If a path is deleted
* on one side of history, we need to detect if it is part of a
* rename if either
- * * we need to detect renames for an ancestor directory
* * the file is modified/deleted on the other side of history
+ * * we need to detect renames for an ancestor directory
* If neither of those are true, we can skip rename detection for
- * that path.
+ * that path. The reason is stored as a value from enum
+ * file_rename_relevance, as the reason can inform the algorithm in
+ * diffcore_rename_extended().
*/
- struct strset relevant_sources[3];
+ struct strintmap relevant_sources[3];
/*
* dir_rename_mask:
@@ -133,6 +143,72 @@ struct rename_info {
char *callback_data_traverse_path;
/*
+ * merge_trees: trees passed to the merge algorithm for the merge
+ *
+ * merge_trees records the trees passed to the merge algorithm. But,
+ * this data also is stored in merge_result->priv. If a sequence of
+ * merges are being done (such as when cherry-picking or rebasing),
+ * the next merge can look at this and re-use information from
+ * previous merges under certain circumstances.
+ *
+ * See also all the cached_* variables.
+ */
+ struct tree *merge_trees[3];
+
+ /*
+ * cached_pairs_valid_side: which side's cached info can be reused
+ *
+ * See the description for merge_trees. For repeated merges, at most
+ * only one side's cached information can be used. Valid values:
+ * MERGE_SIDE2: cached data from side2 can be reused
+ * MERGE_SIDE1: cached data from side1 can be reused
+ * 0: no cached data can be reused
+ */
+ int cached_pairs_valid_side;
+
+ /*
+ * cached_pairs: Caching of renames and deletions.
+ *
+ * These are mappings recording renames and deletions of individual
+ * files (not directories). They are thus a map from an old
+ * filename to either NULL (for deletions) or a new filename (for
+ * renames).
+ */
+ struct strmap cached_pairs[3];
+
+ /*
+ * cached_target_names: just the destinations from cached_pairs
+ *
+ * We sometimes want a fast lookup to determine if a given filename
+ * is one of the destinations in cached_pairs. cached_target_names
+ * is thus duplicative information, but it provides a fast lookup.
+ */
+ struct strset cached_target_names[3];
+
+ /*
+ * cached_irrelevant: Caching of rename_sources that aren't relevant.
+ *
+ * If we try to detect a rename for a source path and succeed, it's
+ * part of a rename. If we try to detect a rename for a source path
+ * and fail, then it's a delete. If we do not try to detect a rename
+ * for a path, then we don't know if it's a rename or a delete. If
+ * merge-ort doesn't think the path is relevant, then we just won't
+ * cache anything for that path. But there's a slight problem in
+ * that merge-ort can think a path is RELEVANT_LOCATION, but due to
+ * commit 9bd342137e ("diffcore-rename: determine which
+ * relevant_sources are no longer relevant", 2021-03-13),
+ * diffcore-rename can downgrade the path to RELEVANT_NO_MORE. To
+ * avoid excessive calls to diffcore_rename_extended() we still need
+ * to cache such paths, though we cannot record them as either
+ * renames or deletes. So we cache them here as a "turned out to be
+ * irrelevant *for this commit*" as they are often also irrelevant
+ * for subsequent commits, though we will have to do some extra
+ * checking to see whether such paths become relevant for rename
+ * detection when cherry-picking/rebasing subsequent commits.
+ */
+ struct strset cached_irrelevant[3];
+
+ /*
* needed_limit: value needed for inexact rename detection to run
*
* If the current rename limit wasn't high enough for inexact
@@ -215,6 +291,16 @@ struct merge_options_internal {
struct rename_info renames;
/*
+ * attr_index: hacky minimal index used for renormalization
+ *
+ * renormalization code _requires_ an index, though it only needs to
+ * find a .gitattributes file within the index. So, when
+ * renormalization is important, we create a special index with just
+ * that one file.
+ */
+ struct index_state attr_index;
+
+ /*
* current_dir_name, toplevel_dir: temporary vars
*
* These are used in collect_merge_info_callback(), and will set the
@@ -362,6 +448,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
int i;
void (*strmap_func)(struct strmap *, int) =
reinitialize ? strmap_partial_clear : strmap_clear;
+ void (*strintmap_func)(struct strintmap *) =
+ reinitialize ? strintmap_partial_clear : strintmap_clear;
void (*strset_func)(struct strset *) =
reinitialize ? strset_partial_clear : strset_clear;
@@ -393,18 +481,27 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
string_list_clear(&opti->paths_to_free, 0);
opti->paths_to_free.strdup_strings = 0;
+ if (opti->attr_index.cache_nr) /* true iff opt->renormalize */
+ discard_index(&opti->attr_index);
+
/* Free memory used by various renames maps */
for (i = MERGE_SIDE1; i <= MERGE_SIDE2; ++i) {
- strset_func(&renames->dirs_removed[i]);
-
- partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
- if (!reinitialize)
- strmap_clear(&renames->dir_rename_count[i], 1);
-
+ strintmap_func(&renames->dirs_removed[i]);
strmap_func(&renames->dir_renames[i], 0);
-
- strset_func(&renames->relevant_sources[i]);
+ strintmap_func(&renames->relevant_sources[i]);
+ if (!reinitialize)
+ assert(renames->cached_pairs_valid_side == 0);
+ if (i != renames->cached_pairs_valid_side) {
+ strset_func(&renames->cached_target_names[i]);
+ strmap_func(&renames->cached_pairs[i], 1);
+ strset_func(&renames->cached_irrelevant[i]);
+ partial_clear_dir_rename_count(&renames->dir_rename_count[i]);
+ if (!reinitialize)
+ strmap_clear(&renames->dir_rename_count[i], 1);
+ }
}
+ renames->cached_pairs_valid_side = 0;
+ renames->dir_rename_mask = 0;
if (!reinitialize) {
struct hashmap_iter iter;
@@ -427,8 +524,6 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
strmap_clear(&opti->output, 0);
}
- renames->dir_rename_mask = 0;
-
/* Clean out callback_data as well. */
FREE_AND_NULL(renames->callback_data);
renames->callback_data_nr = renames->callback_data_alloc = 0;
@@ -669,12 +764,48 @@ static void add_pair(struct merge_options *opt,
struct rename_info *renames = &opt->priv->renames;
int names_idx = is_add ? side : 0;
- if (!is_add) {
+ if (is_add) {
+ if (strset_contains(&renames->cached_target_names[side],
+ pathname))
+ return;
+ } else {
unsigned content_relevant = (match_mask == 0);
unsigned location_relevant = (dir_rename_mask == 0x07);
- if (content_relevant || location_relevant)
- strset_add(&renames->relevant_sources[side], pathname);
+ /*
+ * If pathname is found in cached_irrelevant[side] due to
+ * previous pick but for this commit content is relevant,
+ * then we need to remove it from cached_irrelevant.
+ */
+ if (content_relevant)
+ /* strset_remove is no-op if strset doesn't have key */
+ strset_remove(&renames->cached_irrelevant[side],
+ pathname);
+
+ /*
+ * We do not need to re-detect renames for paths that we already
+ * know the pairing, i.e. for cached_pairs (or
+ * cached_irrelevant). However, handle_deferred_entries() needs
+ * to loop over the union of keys from relevant_sources[side] and
+ * cached_pairs[side], so for simplicity we set relevant_sources
+ * for all the cached_pairs too and then strip them back out in
+ * prune_cached_from_relevant() at the beginning of
+ * detect_regular_renames().
+ */
+ if (content_relevant || location_relevant) {
+ /* content_relevant trumps location_relevant */
+ strintmap_set(&renames->relevant_sources[side], pathname,
+ content_relevant ? RELEVANT_CONTENT : RELEVANT_LOCATION);
+ }
+
+ /*
+ * Avoid creating pair if we've already cached rename results.
+ * Note that we do this after setting relevant_sources[side]
+ * as noted in the comment above.
+ */
+ if (strmap_contains(&renames->cached_pairs[side], pathname) ||
+ strset_contains(&renames->cached_irrelevant[side], pathname))
+ return;
}
one = alloc_filespec(pathname);
@@ -729,10 +860,41 @@ static void collect_rename_info(struct merge_options *opt,
if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
/* absent_mask = 0x07 - dirmask; sides = absent_mask/2 */
unsigned sides = (0x07 - dirmask)/2;
+ unsigned relevance = (renames->dir_rename_mask == 0x07) ?
+ RELEVANT_FOR_ANCESTOR : NOT_RELEVANT;
+ /*
+ * Record relevance of this directory. However, note that
+ * when collect_merge_info_callback() recurses into this
+ * directory and calls collect_rename_info() on paths
+ * within that directory, if we find a path that was added
+ * to this directory on the other side of history, we will
+ * upgrade this value to RELEVANT_FOR_SELF; see below.
+ */
if (sides & 1)
- strset_add(&renames->dirs_removed[1], fullname);
+ strintmap_set(&renames->dirs_removed[1], fullname,
+ relevance);
if (sides & 2)
- strset_add(&renames->dirs_removed[2], fullname);
+ strintmap_set(&renames->dirs_removed[2], fullname,
+ relevance);
+ }
+
+ /*
+ * Here's the block that potentially upgrades to RELEVANT_FOR_SELF.
+ * When we run across a file added to a directory. In such a case,
+ * find the directory of the file and upgrade its relevance.
+ */
+ if (renames->dir_rename_mask == 0x07 &&
+ (filemask == 2 || filemask == 4)) {
+ /*
+ * Need directory rename for parent directory on other side
+ * of history from added file. Thus
+ * side = (~filemask & 0x06) >> 1
+ * or
+ * side = 3 - (filemask/2).
+ */
+ unsigned side = 3 - (filemask >> 1);
+ strintmap_set(&renames->dirs_removed[side], dirname,
+ RELEVANT_FOR_SELF);
}
if (filemask == 0 || filemask == 7)
@@ -1147,6 +1309,63 @@ static int merge_submodule(struct merge_options *opt,
return 0;
}
+static void initialize_attr_index(struct merge_options *opt)
+{
+ /*
+ * The renormalize_buffer() functions require attributes, and
+ * annoyingly those can only be read from the working tree or from
+ * an index_state. merge-ort doesn't have an index_state, so we
+ * generate a fake one containing only attribute information.
+ */
+ struct merged_info *mi;
+ struct index_state *attr_index = &opt->priv->attr_index;
+ struct cache_entry *ce;
+
+ attr_index->initialized = 1;
+
+ if (!opt->renormalize)
+ return;
+
+ mi = strmap_get(&opt->priv->paths, GITATTRIBUTES_FILE);
+ if (!mi)
+ return;
+
+ if (mi->clean) {
+ int len = strlen(GITATTRIBUTES_FILE);
+ ce = make_empty_cache_entry(attr_index, len);
+ ce->ce_mode = create_ce_mode(mi->result.mode);
+ ce->ce_flags = create_ce_flags(0);
+ ce->ce_namelen = len;
+ oidcpy(&ce->oid, &mi->result.oid);
+ memcpy(ce->name, GITATTRIBUTES_FILE, len);
+ add_index_entry(attr_index, ce,
+ ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
+ get_stream_filter(attr_index, GITATTRIBUTES_FILE, &ce->oid);
+ } else {
+ int stage, len;
+ struct conflict_info *ci;
+
+ ASSIGN_AND_VERIFY_CI(ci, mi);
+ for (stage = 0; stage < 3; stage++) {
+ unsigned stage_mask = (1 << stage);
+
+ if (!(ci->filemask & stage_mask))
+ continue;
+ len = strlen(GITATTRIBUTES_FILE);
+ ce = make_empty_cache_entry(attr_index, len);
+ ce->ce_mode = create_ce_mode(ci->stages[stage].mode);
+ ce->ce_flags = create_ce_flags(stage);
+ ce->ce_namelen = len;
+ oidcpy(&ce->oid, &ci->stages[stage].oid);
+ memcpy(ce->name, GITATTRIBUTES_FILE, len);
+ add_index_entry(attr_index, ce,
+ ADD_CACHE_OK_TO_ADD | ADD_CACHE_OK_TO_REPLACE);
+ get_stream_filter(attr_index, GITATTRIBUTES_FILE,
+ &ce->oid);
+ }
+ }
+}
+
static int merge_3way(struct merge_options *opt,
const char *path,
const struct object_id *o,
@@ -1161,6 +1380,9 @@ static int merge_3way(struct merge_options *opt,
char *base, *name1, *name2;
int merge_status;
+ if (!opt->priv->attr_index.initialized)
+ initialize_attr_index(opt);
+
ll_opts.renormalize = opt->renormalize;
ll_opts.extra_marker_size = extra_marker_size;
ll_opts.xdl_opts = opt->xdl_opts;
@@ -1199,7 +1421,7 @@ static int merge_3way(struct merge_options *opt,
merge_status = ll_merge(result_buf, path, &orig, base,
&src1, name1, &src2, name2,
- opt->repo->index, &ll_opts);
+ &opt->priv->attr_index, &ll_opts);
free(base);
free(name1);
@@ -1291,7 +1513,7 @@ static int handle_content_merge(struct merge_options *opt,
two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
merge_status = merge_3way(opt, path,
- two_way ? &null_oid : &o->oid,
+ two_way ? null_oid() : &o->oid,
&a->oid, &b->oid,
pathnames, extra_marker_size,
&result_buf);
@@ -1313,7 +1535,7 @@ static int handle_content_merge(struct merge_options *opt,
} else if (S_ISGITLINK(a->mode)) {
int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode));
clean = merge_submodule(opt, pathnames[0],
- two_way ? &null_oid : &o->oid,
+ two_way ? null_oid() : &o->oid,
&a->oid, &b->oid, &result->oid);
if (opt->priv->call_depth && two_way && !clean) {
result->mode = o->mode;
@@ -1511,6 +1733,9 @@ static void get_provisional_directory_renames(struct merge_options *opt,
}
}
+ if (max == 0)
+ continue;
+
if (bad_max == max) {
path_msg(opt, source_dir, 0,
_("CONFLICT (directory rename split): "
@@ -1519,18 +1744,7 @@ static void get_provisional_directory_renames(struct merge_options *opt,
"no destination getting a majority of the "
"files."),
source_dir);
- /*
- * We should mark this as unclean IF something attempts
- * to use this rename. We do not yet have the logic
- * in place to detect if this directory rename is being
- * used, and optimizations that reduce the number of
- * renames cause this to falsely trigger. For now,
- * just disable it, causing t6423 testcase 2a to break.
- * We'll later fix the detection, and when we do we
- * will re-enable setting *clean to 0 (and thereby fix
- * t6423 testcase 2a).
- */
- /* *clean = 0; */
+ *clean = 0;
} else {
strmap_put(&renames->dir_renames[side],
source_dir, (void*)best);
@@ -1930,6 +2144,9 @@ static int process_renames(struct merge_options *opt,
VERIFY_CI(side2);
if (!strcmp(pathnames[1], pathnames[2])) {
+ struct rename_info *ri = &opt->priv->renames;
+ int j;
+
/* Both sides renamed the same way */
assert(side1 == side2);
memcpy(&side1->stages[0], &base->stages[0],
@@ -1939,6 +2156,16 @@ static int process_renames(struct merge_options *opt,
base->merged.is_null = 1;
base->merged.clean = 1;
+ /*
+ * Disable remembering renames optimization;
+ * rename/rename(1to1) is incredibly rare, and
+ * just disabling the optimization is easier
+ * than purging cached_pairs,
+ * cached_target_names, and dir_rename_counts.
+ */
+ for (j = 0; j < 3; j++)
+ ri->merge_trees[j] = NULL;
+
/* We handled both renames, i.e. i+1 handled */
i++;
/* Move to next rename */
@@ -2123,7 +2350,7 @@ static int process_renames(struct merge_options *opt,
if (type_changed) {
/* rename vs. typechange */
/* Mark the original as resolved by removal */
- memcpy(&oldinfo->stages[0].oid, &null_oid,
+ memcpy(&oldinfo->stages[0].oid, null_oid(),
sizeof(oldinfo->stages[0].oid));
oldinfo->stages[0].mode = 0;
oldinfo->filemask &= 0x06;
@@ -2160,13 +2387,15 @@ static inline int possible_side_renames(struct rename_info *renames,
unsigned side_index)
{
return renames->pairs[side_index].nr > 0 &&
- !strset_empty(&renames->relevant_sources[side_index]);
+ !strintmap_empty(&renames->relevant_sources[side_index]);
}
static inline int possible_renames(struct rename_info *renames)
{
return possible_side_renames(renames, 1) ||
- possible_side_renames(renames, 2);
+ possible_side_renames(renames, 2) ||
+ !strmap_empty(&renames->cached_pairs[1]) ||
+ !strmap_empty(&renames->cached_pairs[2]);
}
static void resolve_diffpair_statuses(struct diff_queue_struct *q)
@@ -2190,6 +2419,112 @@ static void resolve_diffpair_statuses(struct diff_queue_struct *q)
}
}
+static void prune_cached_from_relevant(struct rename_info *renames,
+ unsigned side)
+{
+ /* Reason for this function described in add_pair() */
+ struct hashmap_iter iter;
+ struct strmap_entry *entry;
+
+ /* Remove from relevant_sources all entries in cached_pairs[side] */
+ strmap_for_each_entry(&renames->cached_pairs[side], &iter, entry) {
+ strintmap_remove(&renames->relevant_sources[side],
+ entry->key);
+ }
+ /* Remove from relevant_sources all entries in cached_irrelevant[side] */
+ strset_for_each_entry(&renames->cached_irrelevant[side], &iter, entry) {
+ strintmap_remove(&renames->relevant_sources[side],
+ entry->key);
+ }
+}
+
+static void use_cached_pairs(struct merge_options *opt,
+ struct strmap *cached_pairs,
+ struct diff_queue_struct *pairs)
+{
+ struct hashmap_iter iter;
+ struct strmap_entry *entry;
+
+ /*
+ * Add to side_pairs all entries from renames->cached_pairs[side_index].
+ * (Info in cached_irrelevant[side_index] is not relevant here.)
+ */
+ strmap_for_each_entry(cached_pairs, &iter, entry) {
+ struct diff_filespec *one, *two;
+ const char *old_name = entry->key;
+ const char *new_name = entry->value;
+ if (!new_name)
+ new_name = old_name;
+
+ /* We don't care about oid/mode, only filenames and status */
+ one = alloc_filespec(old_name);
+ two = alloc_filespec(new_name);
+ diff_queue(pairs, one, two);
+ pairs->queue[pairs->nr-1]->status = entry->value ? 'R' : 'D';
+ }
+}
+
+static void cache_new_pair(struct rename_info *renames,
+ int side,
+ char *old_path,
+ char *new_path,
+ int free_old_value)
+{
+ char *old_value;
+ new_path = xstrdup(new_path);
+ old_value = strmap_put(&renames->cached_pairs[side],
+ old_path, new_path);
+ strset_add(&renames->cached_target_names[side], new_path);
+ if (free_old_value)
+ free(old_value);
+ else
+ assert(!old_value);
+}
+
+static void possibly_cache_new_pair(struct rename_info *renames,
+ struct diff_filepair *p,
+ unsigned side,
+ char *new_path)
+{
+ int dir_renamed_side = 0;
+
+ if (new_path) {
+ /*
+ * Directory renames happen on the other side of history from
+ * the side that adds new files to the old directory.
+ */
+ dir_renamed_side = 3 - side;
+ } else {
+ int val = strintmap_get(&renames->relevant_sources[side],
+ p->one->path);
+ if (val == RELEVANT_NO_MORE) {
+ assert(p->status == 'D');
+ strset_add(&renames->cached_irrelevant[side],
+ p->one->path);
+ }
+ if (val <= 0)
+ return;
+ }
+
+ if (p->status == 'D') {
+ /*
+ * If we already had this delete, we'll just set it's value
+ * to NULL again, so no harm.
+ */
+ strmap_put(&renames->cached_pairs[side], p->one->path, NULL);
+ } else if (p->status == 'R') {
+ if (!new_path)
+ new_path = p->two->path;
+ else
+ cache_new_pair(renames, dir_renamed_side,
+ p->two->path, new_path, 0);
+ cache_new_pair(renames, side, p->one->path, new_path, 1);
+ } else if (p->status == 'A' && new_path) {
+ cache_new_pair(renames, dir_renamed_side,
+ p->two->path, new_path, 0);
+ }
+}
+
static int compare_pairs(const void *a_, const void *b_)
{
const struct diff_filepair *a = *((const struct diff_filepair **)a_);
@@ -2205,6 +2540,7 @@ static void detect_regular_renames(struct merge_options *opt,
struct diff_options diff_opts;
struct rename_info *renames = &opt->priv->renames;
+ prune_cached_from_relevant(renames, side_index);
if (!possible_side_renames(renames, side_index)) {
/*
* No rename detection needed for this side, but we still need
@@ -2215,6 +2551,7 @@ static void detect_regular_renames(struct merge_options *opt,
return;
}
+ partial_clear_dir_rename_count(&renames->dir_rename_count[side_index]);
repo_diff_setup(opt->repo, &diff_opts);
diff_opts.flags.recursive = 1;
diff_opts.flags.rename_empty = 0;
@@ -2232,7 +2569,8 @@ static void detect_regular_renames(struct merge_options *opt,
diffcore_rename_extended(&diff_opts,
&renames->relevant_sources[side_index],
&renames->dirs_removed[side_index],
- &renames->dir_rename_count[side_index]);
+ &renames->dir_rename_count[side_index],
+ &renames->cached_pairs[side_index]);
trace2_region_leave("diff", "diffcore_rename", opt->repo);
resolve_diffpair_statuses(&diff_queued_diff);
@@ -2272,6 +2610,7 @@ static int collect_renames(struct merge_options *opt,
char *new_path; /* non-NULL only with directory renames */
if (p->status != 'A' && p->status != 'R') {
+ possibly_cache_new_pair(renames, p, side_index, NULL);
diff_free_filepair(p);
continue;
}
@@ -2283,6 +2622,7 @@ static int collect_renames(struct merge_options *opt,
&collisions,
&clean);
+ possibly_cache_new_pair(renames, p, side_index, new_path);
if (p->status != 'R' && !new_path) {
diff_free_filepair(p);
continue;
@@ -2338,6 +2678,8 @@ static int detect_and_process_renames(struct merge_options *opt,
trace2_region_enter("merge", "regular renames", opt->repo);
detect_regular_renames(opt, MERGE_SIDE1);
detect_regular_renames(opt, MERGE_SIDE2);
+ use_cached_pairs(opt, &renames->cached_pairs[1], &renames->pairs[1]);
+ use_cached_pairs(opt, &renames->cached_pairs[2], &renames->pairs[2]);
trace2_region_leave("merge", "regular renames", opt->repo);
trace2_region_enter("merge", "directory renames", opt->repo);
@@ -2361,7 +2703,7 @@ static int detect_and_process_renames(struct merge_options *opt,
clean &= collect_renames(opt, &combined, MERGE_SIDE2,
&renames->dir_renames[1],
&renames->dir_renames[2]);
- QSORT(combined.queue, combined.nr, compare_pairs);
+ STABLE_QSORT(combined.queue, combined.nr, compare_pairs);
trace2_region_leave("merge", "directory renames", opt->repo);
trace2_region_enter("merge", "process renames", opt->repo);
@@ -2431,6 +2773,61 @@ static int string_list_df_name_compare(const char *one, const char *two)
return onelen - twolen;
}
+static int read_oid_strbuf(struct merge_options *opt,
+ const struct object_id *oid,
+ struct strbuf *dst)
+{
+ void *buf;
+ enum object_type type;
+ unsigned long size;
+ buf = read_object_file(oid, &type, &size);
+ if (!buf)
+ return err(opt, _("cannot read object %s"), oid_to_hex(oid));
+ if (type != OBJ_BLOB) {
+ free(buf);
+ return err(opt, _("object %s is not a blob"), oid_to_hex(oid));
+ }
+ strbuf_attach(dst, buf, size, size + 1);
+ return 0;
+}
+
+static int blob_unchanged(struct merge_options *opt,
+ const struct version_info *base,
+ const struct version_info *side,
+ const char *path)
+{
+ struct strbuf basebuf = STRBUF_INIT;
+ struct strbuf sidebuf = STRBUF_INIT;
+ int ret = 0; /* assume changed for safety */
+ struct index_state *idx = &opt->priv->attr_index;
+
+ if (!idx->initialized)
+ initialize_attr_index(opt);
+
+ if (base->mode != side->mode)
+ return 0;
+ if (oideq(&base->oid, &side->oid))
+ return 1;
+
+ if (read_oid_strbuf(opt, &base->oid, &basebuf) ||
+ read_oid_strbuf(opt, &side->oid, &sidebuf))
+ goto error_return;
+ /*
+ * Note: binary | is used so that both renormalizations are
+ * performed. Comparison can be skipped if both files are
+ * unchanged since their sha1s have already been compared.
+ */
+ if (renormalize_buffer(idx, path, basebuf.buf, basebuf.len, &basebuf) |
+ renormalize_buffer(idx, path, sidebuf.buf, sidebuf.len, &sidebuf))
+ ret = (basebuf.len == sidebuf.len &&
+ !memcmp(basebuf.buf, sidebuf.buf, basebuf.len));
+
+error_return:
+ strbuf_release(&basebuf);
+ strbuf_release(&sidebuf);
+ return ret;
+}
+
struct directory_versions {
/*
* versions: list of (basename -> version_info)
@@ -2491,22 +2888,15 @@ static void write_tree(struct object_id *result_oid,
size_t hash_size)
{
size_t maxlen = 0, extra;
- unsigned int nr = versions->nr - offset;
+ unsigned int nr;
struct strbuf buf = STRBUF_INIT;
- struct string_list relevant_entries = STRING_LIST_INIT_NODUP;
int i;
- /*
- * We want to sort the last (versions->nr-offset) entries in versions.
- * Do so by abusing the string_list API a bit: make another string_list
- * that contains just those entries and then sort them.
- *
- * We won't use relevant_entries again and will let it just pop off the
- * stack, so there won't be allocation worries or anything.
- */
- relevant_entries.items = versions->items + offset;
- relevant_entries.nr = versions->nr - offset;
- QSORT(relevant_entries.items, relevant_entries.nr, tree_entry_order);
+ assert(offset <= versions->nr);
+ nr = versions->nr - offset;
+ if (versions->nr)
+ /* No need for STABLE_QSORT -- filenames must be unique */
+ QSORT(versions->items + offset, nr, tree_entry_order);
/* Pre-allocate some space in buf */
extra = hash_size + 8; /* 8: 6 for mode, 1 for space, 1 for NUL char */
@@ -2762,7 +3152,7 @@ static void process_entry(struct merge_options *opt,
if (ci->filemask & (1 << i))
continue;
ci->stages[i].mode = 0;
- oidcpy(&ci->stages[i].oid, &null_oid);
+ oidcpy(&ci->stages[i].oid, null_oid());
}
} else if (ci->df_conflict && ci->merged.result.mode != 0) {
/*
@@ -2808,7 +3198,7 @@ static void process_entry(struct merge_options *opt,
continue;
/* zero out any entries related to directories */
new_ci->stages[i].mode = 0;
- oidcpy(&new_ci->stages[i].oid, &null_oid);
+ oidcpy(&new_ci->stages[i].oid, null_oid());
}
/*
@@ -2895,12 +3285,21 @@ static void process_entry(struct merge_options *opt,
rename_b = 1;
}
- path_msg(opt, path, 0,
- _("CONFLICT (distinct types): %s had different "
- "types on each side; renamed %s of them so "
- "each can be recorded somewhere."),
- path,
- (rename_a && rename_b) ? _("both") : _("one"));
+ if (rename_a && rename_b) {
+ path_msg(opt, path, 0,
+ _("CONFLICT (distinct types): %s had "
+ "different types on each side; "
+ "renamed both of them so each can "
+ "be recorded somewhere."),
+ path);
+ } else {
+ path_msg(opt, path, 0,
+ _("CONFLICT (distinct types): %s had "
+ "different types on each side; "
+ "renamed one of them so each can be "
+ "recorded somewhere."),
+ path);
+ }
ci->merged.clean = 0;
memcpy(new_ci, ci, sizeof(*new_ci));
@@ -2909,11 +3308,11 @@ static void process_entry(struct merge_options *opt,
new_ci->merged.result.mode = ci->stages[2].mode;
oidcpy(&new_ci->merged.result.oid, &ci->stages[2].oid);
new_ci->stages[1].mode = 0;
- oidcpy(&new_ci->stages[1].oid, &null_oid);
+ oidcpy(&new_ci->stages[1].oid, null_oid());
new_ci->filemask = 5;
if ((S_IFMT & b_mode) != (S_IFMT & o_mode)) {
new_ci->stages[0].mode = 0;
- oidcpy(&new_ci->stages[0].oid, &null_oid);
+ oidcpy(&new_ci->stages[0].oid, null_oid());
new_ci->filemask = 4;
}
@@ -2921,11 +3320,11 @@ static void process_entry(struct merge_options *opt,
ci->merged.result.mode = ci->stages[1].mode;
oidcpy(&ci->merged.result.oid, &ci->stages[1].oid);
ci->stages[2].mode = 0;
- oidcpy(&ci->stages[2].oid, &null_oid);
+ oidcpy(&ci->stages[2].oid, null_oid());
ci->filemask = 3;
if ((S_IFMT & a_mode) != (S_IFMT & o_mode)) {
ci->stages[0].mode = 0;
- oidcpy(&ci->stages[0].oid, &null_oid);
+ oidcpy(&ci->stages[0].oid, null_oid());
ci->filemask = 2;
}
@@ -3017,8 +3416,13 @@ static void process_entry(struct merge_options *opt,
modify_branch = (side == 1) ? opt->branch1 : opt->branch2;
delete_branch = (side == 1) ? opt->branch2 : opt->branch1;
- if (ci->path_conflict &&
- oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
+ if (opt->renormalize &&
+ blob_unchanged(opt, &ci->stages[0], &ci->stages[side],
+ path)) {
+ ci->merged.is_null = 1;
+ ci->merged.clean = 1;
+ } else if (ci->path_conflict &&
+ oideq(&ci->stages[0].oid, &ci->stages[side].oid)) {
/*
* This came from a rename/delete; no action to take,
* but avoid printing "modify/delete" conflict notice
@@ -3042,7 +3446,7 @@ static void process_entry(struct merge_options *opt,
/* Deleted on both sides */
ci->merged.is_null = 1;
ci->merged.result.mode = 0;
- oidcpy(&ci->merged.result.oid, &null_oid);
+ oidcpy(&ci->merged.result.oid, null_oid());
ci->merged.clean = !ci->path_conflict;
}
@@ -3190,23 +3594,27 @@ static int checkout(struct merge_options *opt,
return ret;
}
-static int record_conflicted_index_entries(struct merge_options *opt,
- struct index_state *index,
- struct strmap *paths,
- struct strmap *conflicted)
+static int record_conflicted_index_entries(struct merge_options *opt)
{
struct hashmap_iter iter;
struct strmap_entry *e;
+ struct index_state *index = opt->repo->index;
+ struct checkout state = CHECKOUT_INIT;
int errs = 0;
int original_cache_nr;
- if (strmap_empty(conflicted))
+ if (strmap_empty(&opt->priv->conflicted))
return 0;
+ /* If any entries have skip_worktree set, we'll have to check 'em out */
+ state.force = 1;
+ state.quiet = 1;
+ state.refresh_cache = 1;
+ state.istate = index;
original_cache_nr = index->cache_nr;
/* Put every entry from paths into plist, then sort */
- strmap_for_each_entry(conflicted, &iter, e) {
+ strmap_for_each_entry(&opt->priv->conflicted, &iter, e) {
const char *path = e->key;
struct conflict_info *ci = e->value;
int pos;
@@ -3247,9 +3655,23 @@ static int record_conflicted_index_entries(struct merge_options *opt,
* the higher order stages. Thus, we need override
* the CE_SKIP_WORKTREE bit and manually write those
* files to the working disk here.
- *
- * TODO: Implement this CE_SKIP_WORKTREE fixup.
*/
+ if (ce_skip_worktree(ce)) {
+ struct stat st;
+
+ if (!lstat(path, &st)) {
+ char *new_name = unique_path(&opt->priv->paths,
+ path,
+ "cruft");
+
+ path_msg(opt, path, 1,
+ _("Note: %s not up to date and in way of checking out conflicted version; old copy renamed to %s"),
+ path, new_name);
+ errs |= rename(path, new_name);
+ free(new_name);
+ }
+ errs |= checkout_entry(ce, &state, NULL, NULL);
+ }
/*
* Mark this cache entry for removal and instead add
@@ -3281,6 +3703,11 @@ static int record_conflicted_index_entries(struct merge_options *opt,
* entries we added to the end into their right locations.
*/
remove_marked_cache_entries(index, 1);
+ /*
+ * No need for STABLE_QSORT -- cmp_cache_name_compare sorts primarily
+ * on filename and secondarily on stage, and (name, stage #) are a
+ * unique tuple.
+ */
QSORT(index->cache, index->cache_nr, cmp_cache_name_compare);
return errs;
@@ -3294,7 +3721,8 @@ void merge_switch_to_result(struct merge_options *opt,
{
assert(opt->priv == NULL);
if (result->clean >= 0 && update_worktree_and_index) {
- struct merge_options_internal *opti = result->priv;
+ const char *filename;
+ FILE *fp;
trace2_region_enter("merge", "checkout", opt->repo);
if (checkout(opt, head, result->tree)) {
@@ -3305,14 +3733,22 @@ void merge_switch_to_result(struct merge_options *opt,
trace2_region_leave("merge", "checkout", opt->repo);
trace2_region_enter("merge", "record_conflicted", opt->repo);
- if (record_conflicted_index_entries(opt, opt->repo->index,
- &opti->paths,
- &opti->conflicted)) {
+ opt->priv = result->priv;
+ if (record_conflicted_index_entries(opt)) {
/* failure to function */
+ opt->priv = NULL;
result->clean = -1;
return;
}
+ opt->priv = NULL;
trace2_region_leave("merge", "record_conflicted", opt->repo);
+
+ trace2_region_enter("merge", "write_auto_merge", opt->repo);
+ filename = git_path_auto_merge(opt->repo);
+ fp = xfopen(filename, "w");
+ fprintf(fp, "%s\n", oid_to_hex(&result->tree->object.oid));
+ fclose(fp);
+ trace2_region_leave("merge", "write_auto_merge", opt->repo);
}
if (display_update_msgs) {
@@ -3357,6 +3793,8 @@ void merge_finalize(struct merge_options *opt,
{
struct merge_options_internal *opti = result->priv;
+ if (opt->renormalize)
+ git_attr_set_direction(GIT_ATTR_CHECKIN);
assert(opt->priv == NULL);
clear_or_reinit_internal_opts(opti, 0);
@@ -3365,6 +3803,23 @@ void merge_finalize(struct merge_options *opt,
/*** Function Grouping: helper functions for merge_incore_*() ***/
+static struct tree *shift_tree_object(struct repository *repo,
+ struct tree *one, struct tree *two,
+ const char *subtree_shift)
+{
+ struct object_id shifted;
+
+ if (!*subtree_shift) {
+ shift_tree(repo, &one->object.oid, &two->object.oid, &shifted, 0);
+ } else {
+ shift_tree_by(repo, &one->object.oid, &two->object.oid, &shifted,
+ subtree_shift);
+ }
+ if (oideq(&two->object.oid, &shifted))
+ return two;
+ return lookup_tree(repo, &shifted);
+}
+
static inline void set_commit_tree(struct commit *c, struct tree *t)
{
c->maybe_tree = t;
@@ -3415,6 +3870,10 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
assert(opt->obuf.len == 0);
assert(opt->priv == NULL);
+ if (result->_properly_initialized != 0 &&
+ result->_properly_initialized != RESULT_INITIALIZED)
+ BUG("struct merge_result passed to merge_incore_*recursive() must be zeroed or filled with values from a previous run");
+ assert(!!result->priv == !!result->_properly_initialized);
if (result->priv) {
opt->priv = result->priv;
result->priv = NULL;
@@ -3432,6 +3891,10 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
/* Default to histogram diff. Actually, just hardcode it...for now. */
opt->xdl_opts = DIFF_WITH_ALG(opt, HISTOGRAM_DIFF);
+ /* Handle attr direction stuff for renormalization */
+ if (opt->renormalize)
+ git_attr_set_direction(GIT_ATTR_CHECKOUT);
+
/* Initialization of opt->priv, our internal merge data */
trace2_region_enter("merge", "allocate/init", opt->repo);
if (opt->priv) {
@@ -3444,13 +3907,27 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
/* Initialization of various renames fields */
renames = &opt->priv->renames;
for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) {
- strset_init_with_options(&renames->dirs_removed[i],
- NULL, 0);
+ strintmap_init_with_options(&renames->dirs_removed[i],
+ NOT_RELEVANT, NULL, 0);
strmap_init_with_options(&renames->dir_rename_count[i],
NULL, 1);
strmap_init_with_options(&renames->dir_renames[i],
NULL, 0);
- strset_init_with_options(&renames->relevant_sources[i],
+ /*
+ * relevant_sources uses -1 for the default, because we need
+ * to be able to distinguish not-in-strintmap from valid
+ * relevant_source values from enum file_rename_relevance.
+ * In particular, possibly_cache_new_pair() expects a negative
+ * value for not-found entries.
+ */
+ strintmap_init_with_options(&renames->relevant_sources[i],
+ -1 /* explicitly invalid */,
+ NULL, 0);
+ strmap_init_with_options(&renames->cached_pairs[i],
+ NULL, 1);
+ strset_init_with_options(&renames->cached_irrelevant[i],
+ NULL, 1);
+ strset_init_with_options(&renames->cached_target_names[i],
NULL, 0);
}
@@ -3477,6 +3954,50 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
trace2_region_leave("merge", "allocate/init", opt->repo);
}
+static void merge_check_renames_reusable(struct merge_options *opt,
+ struct merge_result *result,
+ struct tree *merge_base,
+ struct tree *side1,
+ struct tree *side2)
+{
+ struct rename_info *renames;
+ struct tree **merge_trees;
+ struct merge_options_internal *opti = result->priv;
+
+ if (!opti)
+ return;
+
+ renames = &opti->renames;
+ merge_trees = renames->merge_trees;
+
+ /*
+ * Handle case where previous merge operation did not want cache to
+ * take effect, e.g. because rename/rename(1to1) makes it invalid.
+ */
+ if (!merge_trees[0]) {
+ assert(!merge_trees[0] && !merge_trees[1] && !merge_trees[2]);
+ renames->cached_pairs_valid_side = 0; /* neither side valid */
+ return;
+ }
+
+ /*
+ * Handle other cases; note that merge_trees[0..2] will only
+ * be NULL if opti is, or if all three were manually set to
+ * NULL by e.g. rename/rename(1to1) handling.
+ */
+ assert(merge_trees[0] && merge_trees[1] && merge_trees[2]);
+
+ /* Check if we meet a condition for re-using cached_pairs */
+ if (oideq(&merge_base->object.oid, &merge_trees[2]->object.oid) &&
+ oideq(&side1->object.oid, &result->tree->object.oid))
+ renames->cached_pairs_valid_side = MERGE_SIDE1;
+ else if (oideq(&merge_base->object.oid, &merge_trees[1]->object.oid) &&
+ oideq(&side2->object.oid, &result->tree->object.oid))
+ renames->cached_pairs_valid_side = MERGE_SIDE2;
+ else
+ renames->cached_pairs_valid_side = 0; /* neither side valid */
+}
+
/*** Function Grouping: merge_incore_*() and their internal variants ***/
/*
@@ -3490,6 +4011,13 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
{
struct object_id working_tree_oid;
+ if (opt->subtree_shift) {
+ side2 = shift_tree_object(opt->repo, side1, side2,
+ opt->subtree_shift);
+ merge_base = shift_tree_object(opt->repo, side1, merge_base,
+ opt->subtree_shift);
+ }
+
trace2_region_enter("merge", "collect_merge_info", opt->repo);
if (collect_merge_info(opt, merge_base, side1, side2) != 0) {
/*
@@ -3520,6 +4048,7 @@ static void merge_ort_nonrecursive_internal(struct merge_options *opt,
result->clean &= strmap_empty(&opt->priv->conflicted);
if (!opt->priv->call_depth) {
result->priv = opt->priv;
+ result->_properly_initialized = RESULT_INITIALIZED;
opt->priv = NULL;
}
}
@@ -3617,7 +4146,16 @@ void merge_incore_nonrecursive(struct merge_options *opt,
trace2_region_enter("merge", "merge_start", opt->repo);
assert(opt->ancestor != NULL);
+ merge_check_renames_reusable(opt, result, merge_base, side1, side2);
merge_start(opt, result);
+ /*
+ * Record the trees used in this merge, so if there's a next merge in
+ * a cherry-pick or rebase sequence it might be able to take advantage
+ * of the cached_pairs in that next merge.
+ */
+ opt->priv->renames.merge_trees[0] = merge_base;
+ opt->priv->renames.merge_trees[1] = side1;
+ opt->priv->renames.merge_trees[2] = side2;
trace2_region_leave("merge", "merge_start", opt->repo);
merge_ort_nonrecursive_internal(opt, merge_base, side1, side2, result);