summaryrefslogtreecommitdiff
path: root/diffcore-rename.c
diff options
context:
space:
mode:
Diffstat (limited to 'diffcore-rename.c')
-rw-r--r--diffcore-rename.c187
1 files changed, 145 insertions, 42 deletions
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 963ca58221..c95857b51f 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -54,7 +54,7 @@ static void register_rename_src(struct diff_filepair *p)
if (p->broken_pair) {
if (!break_idx) {
break_idx = xmalloc(sizeof(*break_idx));
- strintmap_init(break_idx, -1);
+ strintmap_init_with_options(break_idx, -1, NULL, 0);
}
strintmap_set(break_idx, p->one->path, rename_dst_nr);
}
@@ -87,13 +87,13 @@ struct diff_score {
short name_score;
};
-struct prefetch_options {
+struct inexact_prefetch_options {
struct repository *repo;
int skip_unmodified;
};
-static void prefetch(void *prefetch_options)
+static void inexact_prefetch(void *prefetch_options)
{
- struct prefetch_options *options = prefetch_options;
+ struct inexact_prefetch_options *options = prefetch_options;
int i;
struct oid_array to_fetch = OID_ARRAY_INIT;
@@ -126,7 +126,7 @@ static int estimate_similarity(struct repository *r,
struct diff_filespec *src,
struct diff_filespec *dst,
int minimum_score,
- int skip_unmodified)
+ struct diff_populate_filespec_options *dpf_opt)
{
/* src points at a file that existed in the original tree (or
* optionally a file in the destination tree) and dst points
@@ -143,15 +143,6 @@ static int estimate_similarity(struct repository *r,
*/
unsigned long max_size, delta_size, base_size, src_copied, literal_added;
int score;
- struct diff_populate_filespec_options dpf_options = {
- .check_size_only = 1
- };
- struct prefetch_options prefetch_options = {r, skip_unmodified};
-
- if (r == the_repository && has_promisor_remote()) {
- dpf_options.missing_object_cb = prefetch;
- dpf_options.missing_object_data = &prefetch_options;
- }
/* We deal only with regular files. Symlink renames are handled
* only when they are exact matches --- in other words, no edits
@@ -169,11 +160,13 @@ static int estimate_similarity(struct repository *r,
* is a possible size - we really should have a flag to
* say whether the size is valid or not!)
*/
+ dpf_opt->check_size_only = 1;
+
if (!src->cnt_data &&
- diff_populate_filespec(r, src, &dpf_options))
+ diff_populate_filespec(r, src, dpf_opt))
return 0;
if (!dst->cnt_data &&
- diff_populate_filespec(r, dst, &dpf_options))
+ diff_populate_filespec(r, dst, dpf_opt))
return 0;
max_size = ((src->size > dst->size) ? src->size : dst->size);
@@ -191,11 +184,11 @@ static int estimate_similarity(struct repository *r,
if (max_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
return 0;
- dpf_options.check_size_only = 0;
+ dpf_opt->check_size_only = 0;
- if (!src->cnt_data && diff_populate_filespec(r, src, &dpf_options))
+ if (!src->cnt_data && diff_populate_filespec(r, src, dpf_opt))
return 0;
- if (!dst->cnt_data && diff_populate_filespec(r, dst, &dpf_options))
+ if (!dst->cnt_data && diff_populate_filespec(r, dst, dpf_opt))
return 0;
if (diffcore_count_changes(r, src, dst,
@@ -455,9 +448,9 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
const char *oldname,
const char *newname)
{
- char *old_dir = xstrdup(oldname);
- char *new_dir = xstrdup(newname);
- char new_dir_first_char = new_dir[0];
+ char *old_dir;
+ char *new_dir;
+ const char new_dir_first_char = newname[0];
int first_time_in_loop = 1;
if (!info->setup)
@@ -482,6 +475,10 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
*/
return;
+
+ old_dir = xstrdup(oldname);
+ new_dir = xstrdup(newname);
+
while (1) {
int drd_flag = NOT_RELEVANT;
@@ -568,7 +565,8 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
static void initialize_dir_rename_info(struct dir_rename_info *info,
struct strintmap *relevant_sources,
struct strintmap *dirs_removed,
- struct strmap *dir_rename_count)
+ struct strmap *dir_rename_count,
+ struct strmap *cached_pairs)
{
struct hashmap_iter iter;
struct strmap_entry *entry;
@@ -633,6 +631,17 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
rename_dst[i].p->two->path);
}
+ /* Add cached_pairs to counts */
+ strmap_for_each_entry(cached_pairs, &iter, entry) {
+ const char *old_name = entry->key;
+ const char *new_name = entry->value;
+ if (!new_name)
+ /* known delete; ignore it */
+ continue;
+
+ update_dir_rename_counts(info, dirs_removed, old_name, new_name);
+ }
+
/*
* Now we collapse
* dir_rename_count: old_directory -> {new_directory -> count}
@@ -811,6 +820,78 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
return idx;
}
+struct basename_prefetch_options {
+ struct repository *repo;
+ struct strintmap *relevant_sources;
+ struct strintmap *sources;
+ struct strintmap *dests;
+ struct dir_rename_info *info;
+};
+static void basename_prefetch(void *prefetch_options)
+{
+ struct basename_prefetch_options *options = prefetch_options;
+ struct strintmap *relevant_sources = options->relevant_sources;
+ struct strintmap *sources = options->sources;
+ struct strintmap *dests = options->dests;
+ struct dir_rename_info *info = options->info;
+ int i;
+ struct oid_array to_fetch = OID_ARRAY_INIT;
+
+ /*
+ * TODO: The following loops mirror the code/logic from
+ * find_basename_matches(), though not quite exactly. Maybe
+ * abstract the iteration logic out somehow?
+ */
+ for (i = 0; i < rename_src_nr; ++i) {
+ char *filename = rename_src[i].p->one->path;
+ const char *base = NULL;
+ intptr_t src_index;
+ intptr_t dst_index;
+
+ /* Skip irrelevant sources */
+ if (relevant_sources &&
+ !strintmap_contains(relevant_sources, filename))
+ continue;
+
+ /*
+ * If the basename is unique among remaining sources, then
+ * src_index will equal 'i' and we can attempt to match it
+ * to a unique basename in the destinations. Otherwise,
+ * use directory rename heuristics, if possible.
+ */
+ base = get_basename(filename);
+ src_index = strintmap_get(sources, base);
+ assert(src_index == -1 || src_index == i);
+
+ if (strintmap_contains(dests, base)) {
+ struct diff_filespec *one, *two;
+
+ /* Find a matching destination, if possible */
+ dst_index = strintmap_get(dests, base);
+ if (src_index == -1 || dst_index == -1) {
+ src_index = i;
+ dst_index = idx_possible_rename(filename, info);
+ }
+ if (dst_index == -1)
+ continue;
+
+ /* Ignore this dest if already used in a rename */
+ if (rename_dst[dst_index].is_rename)
+ continue; /* already used previously */
+
+ one = rename_src[src_index].p->one;
+ two = rename_dst[dst_index].p->two;
+
+ /* Add the pairs */
+ diff_add_if_missing(options->repo, &to_fetch, two);
+ diff_add_if_missing(options->repo, &to_fetch, one);
+ }
+ }
+
+ promisor_remote_get_direct(options->repo, to_fetch.oid, to_fetch.nr);
+ oid_array_clear(&to_fetch);
+}
+
static int find_basename_matches(struct diff_options *options,
int minimum_score,
struct dir_rename_info *info,
@@ -850,18 +931,18 @@ static int find_basename_matches(struct diff_options *options,
int i, renames = 0;
struct strintmap sources;
struct strintmap dests;
-
- /*
- * The prefeteching stuff wants to know if it can skip prefetching
- * blobs that are unmodified...and will then do a little extra work
- * to verify that the oids are indeed different before prefetching.
- * Unmodified blobs are only relevant when doing copy detection;
- * when limiting to rename detection, diffcore_rename[_extended]()
- * will never be called with unmodified source paths fed to us, so
- * the extra work necessary to check if rename_src entries are
- * unmodified would be a small waste.
- */
- int skip_unmodified = 0;
+ struct diff_populate_filespec_options dpf_options = {
+ .check_binary = 0,
+ .missing_object_cb = NULL,
+ .missing_object_data = NULL
+ };
+ struct basename_prefetch_options prefetch_options = {
+ .repo = options->repo,
+ .relevant_sources = relevant_sources,
+ .sources = &sources,
+ .dests = &dests,
+ .info = info
+ };
/*
* Create maps of basename -> fullname(s) for remaining sources and
@@ -898,6 +979,11 @@ static int find_basename_matches(struct diff_options *options,
strintmap_set(&dests, base, i);
}
+ if (options->repo == the_repository && has_promisor_remote()) {
+ dpf_options.missing_object_cb = basename_prefetch;
+ dpf_options.missing_object_data = &prefetch_options;
+ }
+
/* Now look for basename matchups and do similarity estimation */
for (i = 0; i < rename_src_nr; ++i) {
char *filename = rename_src[i].p->one->path;
@@ -941,7 +1027,7 @@ static int find_basename_matches(struct diff_options *options,
one = rename_src[src_index].p->one;
two = rename_dst[dst_index].p->two;
score = estimate_similarity(options->repo, one, two,
- minimum_score, skip_unmodified);
+ minimum_score, &dpf_options);
/* If sufficiently similar, record as rename pair */
if (score < minimum_score)
@@ -1009,7 +1095,7 @@ static int too_many_rename_candidates(int num_destinations, int num_sources,
* memory for the matrix anyway.
*/
if (rename_limit <= 0)
- rename_limit = 32767;
+ return 0; /* treat as unlimited */
if (st_mult(num_destinations, num_sources)
<= st_mult(rename_limit, rename_limit))
return 0;
@@ -1247,7 +1333,8 @@ static void handle_early_known_dir_renames(struct dir_rename_info *info,
void diffcore_rename_extended(struct diff_options *options,
struct strintmap *relevant_sources,
struct strintmap *dirs_removed,
- struct strmap *dir_rename_count)
+ struct strmap *dir_rename_count,
+ struct strmap *cached_pairs)
{
int detect_rename = options->detect_rename;
int minimum_score = options->rename_score;
@@ -1259,6 +1346,14 @@ void diffcore_rename_extended(struct diff_options *options,
int num_sources, want_copies;
struct progress *progress = NULL;
struct dir_rename_info info;
+ struct diff_populate_filespec_options dpf_options = {
+ .check_binary = 0,
+ .missing_object_cb = NULL,
+ .missing_object_data = NULL
+ };
+ struct inexact_prefetch_options prefetch_options = {
+ .repo = options->repo
+ };
trace2_region_enter("diff", "setup", options->repo);
info.setup = 0;
@@ -1363,7 +1458,8 @@ void diffcore_rename_extended(struct diff_options *options,
/* Preparation for basename-driven matching. */
trace2_region_enter("diff", "dir rename setup", options->repo);
initialize_dir_rename_info(&info, relevant_sources,
- dirs_removed, dir_rename_count);
+ dirs_removed, dir_rename_count,
+ cached_pairs);
trace2_region_leave("diff", "dir rename setup", options->repo);
/* Utilize file basenames to quickly find renames. */
@@ -1419,6 +1515,13 @@ void diffcore_rename_extended(struct diff_options *options,
(uint64_t)num_destinations * (uint64_t)num_sources);
}
+ /* Finish setting up dpf_options */
+ prefetch_options.skip_unmodified = skip_unmodified;
+ if (options->repo == the_repository && has_promisor_remote()) {
+ dpf_options.missing_object_cb = inexact_prefetch;
+ dpf_options.missing_object_data = &prefetch_options;
+ }
+
CALLOC_ARRAY(mx, st_mult(NUM_CANDIDATE_PER_DST, num_destinations));
for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
struct diff_filespec *two = rename_dst[i].p->two;
@@ -1444,7 +1547,7 @@ void diffcore_rename_extended(struct diff_options *options,
this_src.score = estimate_similarity(options->repo,
one, two,
minimum_score,
- skip_unmodified);
+ &dpf_options);
this_src.name_score = basename_same(one, two);
this_src.dst = i;
this_src.src = j;
@@ -1529,7 +1632,7 @@ void diffcore_rename_extended(struct diff_options *options,
/* all the usual ones need to be kept */
diff_q(&outq, p);
else
- /* no need to keep unmodified pairs; FIXME: remove earlier? */
+ /* no need to keep unmodified pairs */
pair_to_free = p;
if (pair_to_free)
@@ -1560,5 +1663,5 @@ void diffcore_rename_extended(struct diff_options *options,
void diffcore_rename(struct diff_options *options)
{
- diffcore_rename_extended(options, NULL, NULL, NULL);
+ diffcore_rename_extended(options, NULL, NULL, NULL, NULL);
}