From ddddf8d7e254f4af6297d0ed62ea6a5d7eabdb64 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:45 +0000
Subject: fast-import: permit reading multiple marks files

In the future, we'll want to read marks files for submodules as well.
Refactor the existing code to make it possible to read multiple marks
files, each into their own marks set.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fast-import.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'fast-import.c')

diff --git a/fast-import.c b/fast-import.c
index b8b65a801c..b9ecd89699 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -493,9 +493,8 @@ static char *pool_strdup(const char *s)
 	return r;
 }
 
-static void insert_mark(uintmax_t idnum, struct object_entry *oe)
+static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe)
 {
-	struct mark_set *s = marks;
 	while ((idnum >> s->shift) >= 1024) {
 		s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
 		s->shift = marks->shift + 10;
@@ -919,7 +918,7 @@ static int store_object(
 
 	e = insert_object(&oid);
 	if (mark)
-		insert_mark(mark, e);
+		insert_mark(marks, mark, e);
 	if (e->idx.offset) {
 		duplicate_count_by_type[type]++;
 		return 1;
@@ -1117,7 +1116,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
 	e = insert_object(&oid);
 
 	if (mark)
-		insert_mark(mark, e);
+		insert_mark(marks, mark, e);
 
 	if (e->idx.offset) {
 		duplicate_count_by_type[OBJ_BLOB]++;
@@ -1712,16 +1711,9 @@ static void dump_marks(void)
 	}
 }
 
-static void read_marks(void)
+static void read_mark_file(struct mark_set *s, FILE *f)
 {
 	char line[512];
-	FILE *f = fopen(import_marks_file, "r");
-	if (f)
-		;
-	else if (import_marks_file_ignore_missing && errno == ENOENT)
-		goto done; /* Marks file does not exist */
-	else
-		die_errno("cannot read '%s'", import_marks_file);
 	while (fgets(line, sizeof(line), f)) {
 		uintmax_t mark;
 		char *end;
@@ -1747,8 +1739,20 @@ static void read_marks(void)
 			e->pack_id = MAX_PACK_ID;
 			e->idx.offset = 1; /* just not zero! */
 		}
-		insert_mark(mark, e);
+		insert_mark(s, mark, e);
 	}
+}
+
+static void read_marks(void)
+{
+	FILE *f = fopen(import_marks_file, "r");
+	if (f)
+		;
+	else if (import_marks_file_ignore_missing && errno == ENOENT)
+		goto done; /* Marks file does not exist */
+	else
+		die_errno("cannot read '%s'", import_marks_file);
+	read_mark_file(marks, f);
 	fclose(f);
 done:
 	import_marks_file_done = 1;
@@ -3130,7 +3134,7 @@ static void parse_alias(void)
 		die(_("Expected 'to' command, got %s"), command_buf.buf);
 	e = find_object(&b.oid);
 	assert(e);
-	insert_mark(next_mark, e);
+	insert_mark(marks, next_mark, e);
 }
 
 static char* make_fast_import_path(const char *path)
-- 
cgit v1.2.3


From abe0cc536414f2b9cfa37f208b36df5126e6356a Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:46 +0000
Subject: fast-import: add helper function for inserting mark object entries

Currently, everything we want to insert into a mark set is an object
entry. However, in the future, we will want to insert objects of other
types. Teach read_mark_file to take a function pointer which helps us
insert the object we want into our mark set.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fast-import.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

(limited to 'fast-import.c')

diff --git a/fast-import.c b/fast-import.c
index b9ecd89699..3ce4a04473 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -131,6 +131,8 @@ struct recent_command {
 	char *buf;
 };
 
+typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
+
 /* Configured limits on output */
 static unsigned long max_depth = 50;
 static off_t max_packsize;
@@ -1711,14 +1713,30 @@ static void dump_marks(void)
 	}
 }
 
-static void read_mark_file(struct mark_set *s, FILE *f)
+static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+{
+	struct object_entry *e;
+	e = find_object(oid);
+	if (!e) {
+		enum object_type type = oid_object_info(the_repository,
+							oid, NULL);
+		if (type < 0)
+			die("object not found: %s", oid_to_hex(oid));
+		e = insert_object(oid);
+		e->type = type;
+		e->pack_id = MAX_PACK_ID;
+		e->idx.offset = 1; /* just not zero! */
+	}
+	insert_mark(s, mark, e);
+}
+
+static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
 {
 	char line[512];
 	while (fgets(line, sizeof(line), f)) {
 		uintmax_t mark;
 		char *end;
 		struct object_id oid;
-		struct object_entry *e;
 
 		end = strchr(line, '\n');
 		if (line[0] != ':' || !end)
@@ -1728,18 +1746,7 @@ static void read_mark_file(struct mark_set *s, FILE *f)
 		if (!mark || end == line + 1
 			|| *end != ' ' || get_oid_hex(end + 1, &oid))
 			die("corrupt mark line: %s", line);
-		e = find_object(&oid);
-		if (!e) {
-			enum object_type type = oid_object_info(the_repository,
-								&oid, NULL);
-			if (type < 0)
-				die("object not found: %s", oid_to_hex(&oid));
-			e = insert_object(&oid);
-			e->type = type;
-			e->pack_id = MAX_PACK_ID;
-			e->idx.offset = 1; /* just not zero! */
-		}
-		insert_mark(s, mark, e);
+		inserter(s, &oid, mark);
 	}
 }
 
@@ -1752,7 +1759,7 @@ static void read_marks(void)
 		goto done; /* Marks file does not exist */
 	else
 		die_errno("cannot read '%s'", import_marks_file);
-	read_mark_file(marks, f);
+	read_mark_file(marks, f, insert_object_entry);
 	fclose(f);
 done:
 	import_marks_file_done = 1;
-- 
cgit v1.2.3


From 11d8ef3ee7ab8cfe56a4276ea4b62794d23293e3 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:47 +0000
Subject: fast-import: make find_marks work on any mark set

In the future, we'll use multiple different mark sets with this
function, so make it take an argument that points to the mark set to
operate on.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fast-import.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

(limited to 'fast-import.c')

diff --git a/fast-import.c b/fast-import.c
index 3ce4a04473..8aaa7f6289 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -517,10 +517,9 @@ static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry
 	s->data.marked[idnum] = oe;
 }
 
-static struct object_entry *find_mark(uintmax_t idnum)
+static void *find_mark(struct mark_set *s, uintmax_t idnum)
 {
 	uintmax_t orig_idnum = idnum;
-	struct mark_set *s = marks;
 	struct object_entry *oe = NULL;
 	if ((idnum >> s->shift) < 1024) {
 		while (s && s->shift) {
@@ -2225,7 +2224,7 @@ static void file_change_m(const char *p, struct branch *b)
 	}
 
 	if (*p == ':') {
-		oe = find_mark(parse_mark_ref_space(&p));
+		oe = find_mark(marks, parse_mark_ref_space(&p));
 		oidcpy(&oid, &oe->idx.oid);
 	} else if (skip_prefix(p, "inline ", &p)) {
 		inline_data = 1;
@@ -2399,7 +2398,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 	/* Now parse the notemodify command. */
 	/* <dataref> or 'inline' */
 	if (*p == ':') {
-		oe = find_mark(parse_mark_ref_space(&p));
+		oe = find_mark(marks, parse_mark_ref_space(&p));
 		oidcpy(&oid, &oe->idx.oid);
 	} else if (skip_prefix(p, "inline ", &p)) {
 		inline_data = 1;
@@ -2420,7 +2419,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 		oidcpy(&commit_oid, &s->oid);
 	} else if (*p == ':') {
 		uintmax_t commit_mark = parse_mark_ref_eol(p);
-		struct object_entry *commit_oe = find_mark(commit_mark);
+		struct object_entry *commit_oe = find_mark(marks, commit_mark);
 		if (commit_oe->type != OBJ_COMMIT)
 			die("Mark :%" PRIuMAX " not a commit", commit_mark);
 		oidcpy(&commit_oid, &commit_oe->idx.oid);
@@ -2524,7 +2523,7 @@ static int parse_objectish(struct branch *b, const char *objectish)
 		oidcpy(&b->branch_tree.versions[1].oid, t);
 	} else if (*objectish == ':') {
 		uintmax_t idnum = parse_mark_ref_eol(objectish);
-		struct object_entry *oe = find_mark(idnum);
+		struct object_entry *oe = find_mark(marks, idnum);
 		if (oe->type != OBJ_COMMIT)
 			die("Mark :%" PRIuMAX " not a commit", idnum);
 		if (!oideq(&b->oid, &oe->idx.oid)) {
@@ -2588,7 +2587,7 @@ static struct hash_list *parse_merge(unsigned int *count)
 			oidcpy(&n->oid, &s->oid);
 		else if (*from == ':') {
 			uintmax_t idnum = parse_mark_ref_eol(from);
-			struct object_entry *oe = find_mark(idnum);
+			struct object_entry *oe = find_mark(marks, idnum);
 			if (oe->type != OBJ_COMMIT)
 				die("Mark :%" PRIuMAX " not a commit", idnum);
 			oidcpy(&n->oid, &oe->idx.oid);
@@ -2762,7 +2761,7 @@ static void parse_new_tag(const char *arg)
 	} else if (*from == ':') {
 		struct object_entry *oe;
 		from_mark = parse_mark_ref_eol(from);
-		oe = find_mark(from_mark);
+		oe = find_mark(marks, from_mark);
 		type = oe->type;
 		oidcpy(&oid, &oe->idx.oid);
 	} else if (!get_oid(from, &oid)) {
@@ -2920,7 +2919,7 @@ static void parse_get_mark(const char *p)
 	if (*p != ':')
 		die("Not a mark: %s", p);
 
-	oe = find_mark(parse_mark_ref_eol(p));
+	oe = find_mark(marks, parse_mark_ref_eol(p));
 	if (!oe)
 		die("Unknown mark: %s", command_buf.buf);
 
@@ -2935,7 +2934,7 @@ static void parse_cat_blob(const char *p)
 
 	/* cat-blob SP <object> LF */
 	if (*p == ':') {
-		oe = find_mark(parse_mark_ref_eol(p));
+		oe = find_mark(marks, parse_mark_ref_eol(p));
 		if (!oe)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &oe->idx.oid);
@@ -3010,7 +3009,7 @@ static struct object_entry *parse_treeish_dataref(const char **p)
 	struct object_entry *e;
 
 	if (**p == ':') {	/* <mark> */
-		e = find_mark(parse_mark_ref_space(p));
+		e = find_mark(marks, parse_mark_ref_space(p));
 		if (!e)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &e->idx.oid);
-- 
cgit v1.2.3


From d9db599ca85b008faedc1d5e771e28b6fb648c4e Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:48 +0000
Subject: fast-import: add a generic function to iterate over marks

Currently, we can iterate over marks only to dump them to a file. In the
future, we'll want to perform an arbitrary operation over the items of a
mark set. Add a function, for_each_mark, that iterates over marks in a
set and performs an arbitrary callback function for each mark. Switch
the mark dumping routine to use this function now that it's available.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fast-import.c | 50 ++++++++++++++++++++++++++------------------------
 1 file changed, 26 insertions(+), 24 deletions(-)

(limited to 'fast-import.c')

diff --git a/fast-import.c b/fast-import.c
index 8aaa7f6289..6711f71ba7 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -132,6 +132,7 @@ struct recent_command {
 };
 
 typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
+typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp);
 
 /* Configured limits on output */
 static unsigned long max_depth = 50;
@@ -232,6 +233,29 @@ static void parse_get_mark(const char *p);
 static void parse_cat_blob(const char *p);
 static void parse_ls(const char *p, struct branch *b);
 
+static void for_each_mark(struct mark_set *m, uintmax_t base, each_mark_fn_t callback, void *p)
+{
+	uintmax_t k;
+	if (m->shift) {
+		for (k = 0; k < 1024; k++) {
+			if (m->data.sets[k])
+				for_each_mark(m->data.sets[k], base + (k << m->shift), callback, p);
+		}
+	} else {
+		for (k = 0; k < 1024; k++) {
+			if (m->data.marked[k])
+				callback(base + k, m->data.marked[k], p);
+		}
+	}
+}
+
+static void dump_marks_fn(uintmax_t mark, void *object, void *cbp) {
+	struct object_entry *e = object;
+	FILE *f = cbp;
+
+	fprintf(f, ":%" PRIuMAX " %s\n", mark, oid_to_hex(&e->idx.oid));
+}
+
 static void write_branch_report(FILE *rpt, struct branch *b)
 {
 	fprintf(rpt, "%s:\n", b->name);
@@ -260,8 +284,6 @@ static void write_branch_report(FILE *rpt, struct branch *b)
 	fputc('\n', rpt);
 }
 
-static void dump_marks_helper(FILE *, uintmax_t, struct mark_set *);
-
 static void write_crash_report(const char *err)
 {
 	char *loc = git_pathdup("fast_import_crash_%"PRIuMAX, (uintmax_t) getpid());
@@ -340,7 +362,7 @@ static void write_crash_report(const char *err)
 	if (export_marks_file)
 		fprintf(rpt, "  exported to %s\n", export_marks_file);
 	else
-		dump_marks_helper(rpt, 0, marks);
+		for_each_mark(marks, 0, dump_marks_fn, rpt);
 
 	fputc('\n', rpt);
 	fputs("-------------------\n", rpt);
@@ -1655,26 +1677,6 @@ static void dump_tags(void)
 	strbuf_release(&err);
 }
 
-static void dump_marks_helper(FILE *f,
-	uintmax_t base,
-	struct mark_set *m)
-{
-	uintmax_t k;
-	if (m->shift) {
-		for (k = 0; k < 1024; k++) {
-			if (m->data.sets[k])
-				dump_marks_helper(f, base + (k << m->shift),
-					m->data.sets[k]);
-		}
-	} else {
-		for (k = 0; k < 1024; k++) {
-			if (m->data.marked[k])
-				fprintf(f, ":%" PRIuMAX " %s\n", base + k,
-					oid_to_hex(&m->data.marked[k]->idx.oid));
-		}
-	}
-}
-
 static void dump_marks(void)
 {
 	struct lock_file mark_lock = LOCK_INIT;
@@ -1704,7 +1706,7 @@ static void dump_marks(void)
 		return;
 	}
 
-	dump_marks_helper(f, 0, marks);
+	for_each_mark(marks, 0, dump_marks_fn, f);
 	if (commit_lock_file(&mark_lock)) {
 		failure |= error_errno("Unable to write file %s",
 				       export_marks_file);
-- 
cgit v1.2.3


From 1bdca816412910e1206c15ef47f2a8a6b369b831 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:49 +0000
Subject: fast-import: add options for rewriting submodules

When converting a repository using submodules from one hash algorithm to
another, it is necessary to rewrite the submodules from the old
algorithm to the new algorithm, since only references to submodules, not
their contents, are written to the fast-export stream. Without rewriting
the submodules, fast-import fails with an "Invalid dataref" error when
encountering a submodule in another algorithm.

Add a pair of options, --rewrite-submodules-from and
--rewrite-submodules-to, that take a list of marks produced by
fast-export and fast-import, respectively, when processing the
submodule. Use these marks to map the submodule commits from the old
algorithm to the new algorithm.

We read marks into two corresponding struct mark_set objects and then
perform a mapping from the old to the new using a hash table. This lets
us reuse the same mark parsing code that is used elsewhere and allows us
to efficiently read and match marks based on their ID, since mark files
need not be sorted.

Note that because we're using a khash table for the object IDs, and this
table copies values of struct object_id instead of taking references to
them, it's necessary to zero the struct object_id values that we use to
insert and look up in the table. Otherwise, we would end up with SHA-1
values that don't match because of whatever stack garbage might be left
in the unused area.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 fast-import.c | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 107 insertions(+), 5 deletions(-)

(limited to 'fast-import.c')

diff --git a/fast-import.c b/fast-import.c
index 6711f71ba7..202dda11a6 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -18,6 +18,7 @@
 #include "object-store.h"
 #include "mem-pool.h"
 #include "commit-reach.h"
+#include "khash.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -53,6 +54,7 @@ struct object_entry_pool {
 
 struct mark_set {
 	union {
+		struct object_id *oids[1024];
 		struct object_entry *marked[1024];
 		struct mark_set *sets[1024];
 	} data;
@@ -225,6 +227,11 @@ static int allow_unsafe_features;
 /* Signal handling */
 static volatile sig_atomic_t checkpoint_requested;
 
+/* Submodule marks */
+static struct string_list sub_marks_from = STRING_LIST_INIT_DUP;
+static struct string_list sub_marks_to = STRING_LIST_INIT_DUP;
+static kh_oid_map_t *sub_oid_map;
+
 /* Where to write output of cat-blob commands */
 static int cat_blob_fd = STDOUT_FILENO;
 
@@ -1731,6 +1738,11 @@ static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintm
 	insert_mark(s, mark, e);
 }
 
+static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+{
+	insert_mark(s, mark, xmemdupz(oid, sizeof(*oid)));
+}
+
 static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
 {
 	char line[512];
@@ -1739,13 +1751,17 @@ static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inse
 		char *end;
 		struct object_id oid;
 
+		/* Ensure SHA-1 objects are padded with zeros. */
+		memset(oid.hash, 0, sizeof(oid.hash));
+
 		end = strchr(line, '\n');
 		if (line[0] != ':' || !end)
 			die("corrupt mark line: %s", line);
 		*end = 0;
 		mark = strtoumax(line + 1, &end, 10);
 		if (!mark || end == line + 1
-			|| *end != ' ' || get_oid_hex(end + 1, &oid))
+			|| *end != ' '
+			|| get_oid_hex_any(end + 1, &oid) == GIT_HASH_UNKNOWN)
 			die("corrupt mark line: %s", line);
 		inserter(s, &oid, mark);
 	}
@@ -2146,6 +2162,30 @@ static uintmax_t change_note_fanout(struct tree_entry *root,
 	return do_change_note_fanout(root, root, hex_oid, 0, path, 0, fanout);
 }
 
+static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end)
+{
+	int algo;
+	khiter_t it;
+
+	/* Make SHA-1 object IDs have all-zero padding. */
+	memset(oid->hash, 0, sizeof(oid->hash));
+
+	algo = parse_oid_hex_any(hex, oid, end);
+	if (algo == GIT_HASH_UNKNOWN)
+		return -1;
+
+	it = kh_get_oid_map(sub_oid_map, *oid);
+	/* No such object? */
+	if (it == kh_end(sub_oid_map)) {
+		/* If we're using the same algorithm, pass it through. */
+		if (hash_algos[algo].format_id == the_hash_algo->format_id)
+			return 0;
+		return -1;
+	}
+	oidcpy(oid, kh_value(sub_oid_map, it));
+	return 0;
+}
+
 /*
  * Given a pointer into a string, parse a mark reference:
  *
@@ -2232,7 +2272,7 @@ static void file_change_m(const char *p, struct branch *b)
 		inline_data = 1;
 		oe = NULL; /* not used with inline_data, but makes gcc happy */
 	} else {
-		if (parse_oid_hex(p, &oid, &p))
+		if (parse_mapped_oid_hex(p, &oid, &p))
 			die("Invalid dataref: %s", command_buf.buf);
 		oe = find_object(&oid);
 		if (*p++ != ' ')
@@ -2406,7 +2446,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 		inline_data = 1;
 		oe = NULL; /* not used with inline_data, but makes gcc happy */
 	} else {
-		if (parse_oid_hex(p, &oid, &p))
+		if (parse_mapped_oid_hex(p, &oid, &p))
 			die("Invalid dataref: %s", command_buf.buf);
 		oe = find_object(&oid);
 		if (*p++ != ' ')
@@ -2941,7 +2981,7 @@ static void parse_cat_blob(const char *p)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &oe->idx.oid);
 	} else {
-		if (parse_oid_hex(p, &oid, &p))
+		if (parse_mapped_oid_hex(p, &oid, &p))
 			die("Invalid dataref: %s", command_buf.buf);
 		if (*p)
 			die("Garbage after SHA1: %s", command_buf.buf);
@@ -3005,6 +3045,42 @@ static struct object_entry *dereference(struct object_entry *oe,
 	return find_object(oid);
 }
 
+static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp)
+{
+	struct object_id *fromoid = object;
+	struct object_id *tooid = find_mark(cbp, mark);
+	int ret;
+	khiter_t it;
+
+	it = kh_put_oid_map(sub_oid_map, *fromoid, &ret);
+	/* We've already seen this object. */
+	if (ret == 0)
+		return;
+	kh_value(sub_oid_map, it) = tooid;
+}
+
+static void build_mark_map_one(struct mark_set *from, struct mark_set *to)
+{
+	for_each_mark(from, 0, insert_mapped_mark, to);
+}
+
+static void build_mark_map(struct string_list *from, struct string_list *to)
+{
+	struct string_list_item *fromp, *top;
+
+	sub_oid_map = kh_init_oid_map();
+
+	for_each_string_list_item(fromp, from) {
+		top = string_list_lookup(to, fromp->string);
+		if (!fromp->util) {
+			die(_("Missing from marks for submodule '%s'"), fromp->string);
+		} else if (!top || !top->util) {
+			die(_("Missing to marks for submodule '%s'"), fromp->string);
+		}
+		build_mark_map_one(fromp->util, top->util);
+	}
+}
+
 static struct object_entry *parse_treeish_dataref(const char **p)
 {
 	struct object_id oid;
@@ -3016,7 +3092,7 @@ static struct object_entry *parse_treeish_dataref(const char **p)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &e->idx.oid);
 	} else {	/* <sha1> */
-		if (parse_oid_hex(*p, &oid, p))
+		if (parse_mapped_oid_hex(*p, &oid, p))
 			die("Invalid dataref: %s", command_buf.buf);
 		e = find_object(&oid);
 		if (*(*p)++ != ' ')
@@ -3222,6 +3298,26 @@ static void option_export_pack_edges(const char *edges)
 	pack_edges = xfopen(edges, "a");
 }
 
+static void option_rewrite_submodules(const char *arg, struct string_list *list)
+{
+	struct mark_set *ms;
+	FILE *fp;
+	char *s = xstrdup(arg);
+	char *f = strchr(s, ':');
+	if (!f)
+		die(_("Expected format name:filename for submodule rewrite option"));
+	*f = '\0';
+	f++;
+	ms = xcalloc(1, sizeof(*ms));
+	string_list_insert(list, s)->util = ms;
+
+	fp = fopen(f, "r");
+	if (!fp)
+		die_errno("cannot read '%s'", f);
+	read_mark_file(ms, fp, insert_oid_entry);
+	fclose(fp);
+}
+
 static int parse_one_option(const char *option)
 {
 	if (skip_prefix(option, "max-pack-size=", &option)) {
@@ -3284,6 +3380,11 @@ static int parse_one_feature(const char *feature, int from_stream)
 		option_export_marks(arg);
 	} else if (!strcmp(feature, "alias")) {
 		; /* Don't die - this feature is supported */
+	} else if (skip_prefix(feature, "rewrite-submodules-to=", &arg)) {
+		option_rewrite_submodules(arg, &sub_marks_to);
+	} else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
+		option_rewrite_submodules(arg, &sub_marks_from);
+	} else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
 	} else if (!strcmp(feature, "get-mark")) {
 		; /* Don't die - this feature is supported */
 	} else if (!strcmp(feature, "cat-blob")) {
@@ -3389,6 +3490,7 @@ static void parse_argv(void)
 	seen_data_command = 1;
 	if (import_marks_file)
 		read_marks();
+	build_mark_map(&sub_marks_from, &sub_marks_to);
 }
 
 int cmd_main(int argc, const char **argv)
-- 
cgit v1.2.3