summaryrefslogtreecommitdiff
path: root/fast-import.c
diff options
context:
space:
mode:
authorLibravatar Junio C Hamano <gitster@pobox.com>2020-03-26 17:11:20 -0700
committerLibravatar Junio C Hamano <gitster@pobox.com>2020-03-26 17:11:20 -0700
commitf8cb64e3d4d512a86c1b7b3aa584f11740b3d038 (patch)
treea679837356ebc7f598a0073a33ee0aea43edcd1e /fast-import.c
parentMerge branch 'pb/recurse-submodules-fix' (diff)
parentfast-import: add options for rewriting submodules (diff)
downloadtgif-f8cb64e3d4d512a86c1b7b3aa584f11740b3d038.tar.xz
Merge branch 'bc/sha-256-part-1-of-4'
SHA-256 transition continues. * bc/sha-256-part-1-of-4: (22 commits) fast-import: add options for rewriting submodules fast-import: add a generic function to iterate over marks fast-import: make find_marks work on any mark set fast-import: add helper function for inserting mark object entries fast-import: permit reading multiple marks files commit: use expected signature header for SHA-256 worktree: allow repository version 1 init-db: move writing repo version into a function builtin/init-db: add environment variable for new repo hash builtin/init-db: allow specifying hash algorithm on command line setup: allow check_repository_format to read repository format t/helper: make repository tests hash independent t/helper: initialize repository if necessary t/helper/test-dump-split-index: initialize git repository t6300: make hash algorithm independent t6300: abstract away SHA-1-specific constants t: use hash-specific lookup tables to define test constants repository: require a build flag to use SHA-256 hex: add functions to parse hex object IDs in any algorithm hex: introduce parsing variants taking hash algorithms ...
Diffstat (limited to 'fast-import.c')
-rw-r--r--fast-import.c246
1 files changed, 180 insertions, 66 deletions
diff --git a/fast-import.c b/fast-import.c
index b8b65a801c..202dda11a6 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -18,6 +18,7 @@
#include "object-store.h"
#include "mem-pool.h"
#include "commit-reach.h"
+#include "khash.h"
#define PACK_ID_BITS 16
#define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -53,6 +54,7 @@ struct object_entry_pool {
struct mark_set {
union {
+ struct object_id *oids[1024];
struct object_entry *marked[1024];
struct mark_set *sets[1024];
} data;
@@ -131,6 +133,9 @@ struct recent_command {
char *buf;
};
+typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
+typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp);
+
/* Configured limits on output */
static unsigned long max_depth = 50;
static off_t max_packsize;
@@ -222,6 +227,11 @@ static int allow_unsafe_features;
/* Signal handling */
static volatile sig_atomic_t checkpoint_requested;
+/* Submodule marks */
+static struct string_list sub_marks_from = STRING_LIST_INIT_DUP;
+static struct string_list sub_marks_to = STRING_LIST_INIT_DUP;
+static kh_oid_map_t *sub_oid_map;
+
/* Where to write output of cat-blob commands */
static int cat_blob_fd = STDOUT_FILENO;
@@ -230,6 +240,29 @@ static void parse_get_mark(const char *p);
static void parse_cat_blob(const char *p);
static void parse_ls(const char *p, struct branch *b);
+static void for_each_mark(struct mark_set *m, uintmax_t base, each_mark_fn_t callback, void *p)
+{
+ uintmax_t k;
+ if (m->shift) {
+ for (k = 0; k < 1024; k++) {
+ if (m->data.sets[k])
+ for_each_mark(m->data.sets[k], base + (k << m->shift), callback, p);
+ }
+ } else {
+ for (k = 0; k < 1024; k++) {
+ if (m->data.marked[k])
+ callback(base + k, m->data.marked[k], p);
+ }
+ }
+}
+
+static void dump_marks_fn(uintmax_t mark, void *object, void *cbp) {
+ struct object_entry *e = object;
+ FILE *f = cbp;
+
+ fprintf(f, ":%" PRIuMAX " %s\n", mark, oid_to_hex(&e->idx.oid));
+}
+
static void write_branch_report(FILE *rpt, struct branch *b)
{
fprintf(rpt, "%s:\n", b->name);
@@ -258,8 +291,6 @@ static void write_branch_report(FILE *rpt, struct branch *b)
fputc('\n', rpt);
}
-static void dump_marks_helper(FILE *, uintmax_t, struct mark_set *);
-
static void write_crash_report(const char *err)
{
char *loc = git_pathdup("fast_import_crash_%"PRIuMAX, (uintmax_t) getpid());
@@ -338,7 +369,7 @@ static void write_crash_report(const char *err)
if (export_marks_file)
fprintf(rpt, " exported to %s\n", export_marks_file);
else
- dump_marks_helper(rpt, 0, marks);
+ for_each_mark(marks, 0, dump_marks_fn, rpt);
fputc('\n', rpt);
fputs("-------------------\n", rpt);
@@ -493,9 +524,8 @@ static char *pool_strdup(const char *s)
return r;
}
-static void insert_mark(uintmax_t idnum, struct object_entry *oe)
+static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe)
{
- struct mark_set *s = marks;
while ((idnum >> s->shift) >= 1024) {
s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
s->shift = marks->shift + 10;
@@ -516,10 +546,9 @@ static void insert_mark(uintmax_t idnum, struct object_entry *oe)
s->data.marked[idnum] = oe;
}
-static struct object_entry *find_mark(uintmax_t idnum)
+static void *find_mark(struct mark_set *s, uintmax_t idnum)
{
uintmax_t orig_idnum = idnum;
- struct mark_set *s = marks;
struct object_entry *oe = NULL;
if ((idnum >> s->shift) < 1024) {
while (s && s->shift) {
@@ -919,7 +948,7 @@ static int store_object(
e = insert_object(&oid);
if (mark)
- insert_mark(mark, e);
+ insert_mark(marks, mark, e);
if (e->idx.offset) {
duplicate_count_by_type[type]++;
return 1;
@@ -1117,7 +1146,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
e = insert_object(&oid);
if (mark)
- insert_mark(mark, e);
+ insert_mark(marks, mark, e);
if (e->idx.offset) {
duplicate_count_by_type[OBJ_BLOB]++;
@@ -1655,26 +1684,6 @@ static void dump_tags(void)
strbuf_release(&err);
}
-static void dump_marks_helper(FILE *f,
- uintmax_t base,
- struct mark_set *m)
-{
- uintmax_t k;
- if (m->shift) {
- for (k = 0; k < 1024; k++) {
- if (m->data.sets[k])
- dump_marks_helper(f, base + (k << m->shift),
- m->data.sets[k]);
- }
- } else {
- for (k = 0; k < 1024; k++) {
- if (m->data.marked[k])
- fprintf(f, ":%" PRIuMAX " %s\n", base + k,
- oid_to_hex(&m->data.marked[k]->idx.oid));
- }
- }
-}
-
static void dump_marks(void)
{
struct lock_file mark_lock = LOCK_INIT;
@@ -1704,7 +1713,7 @@ static void dump_marks(void)
return;
}
- dump_marks_helper(f, 0, marks);
+ for_each_mark(marks, 0, dump_marks_fn, f);
if (commit_lock_file(&mark_lock)) {
failure |= error_errno("Unable to write file %s",
export_marks_file);
@@ -1712,21 +1721,38 @@ static void dump_marks(void)
}
}
-static void read_marks(void)
+static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+{
+ struct object_entry *e;
+ e = find_object(oid);
+ if (!e) {
+ enum object_type type = oid_object_info(the_repository,
+ oid, NULL);
+ if (type < 0)
+ die("object not found: %s", oid_to_hex(oid));
+ e = insert_object(oid);
+ e->type = type;
+ e->pack_id = MAX_PACK_ID;
+ e->idx.offset = 1; /* just not zero! */
+ }
+ insert_mark(s, mark, e);
+}
+
+static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+{
+ insert_mark(s, mark, xmemdupz(oid, sizeof(*oid)));
+}
+
+static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
{
char line[512];
- FILE *f = fopen(import_marks_file, "r");
- if (f)
- ;
- else if (import_marks_file_ignore_missing && errno == ENOENT)
- goto done; /* Marks file does not exist */
- else
- die_errno("cannot read '%s'", import_marks_file);
while (fgets(line, sizeof(line), f)) {
uintmax_t mark;
char *end;
struct object_id oid;
- struct object_entry *e;
+
+ /* Ensure SHA-1 objects are padded with zeros. */
+ memset(oid.hash, 0, sizeof(oid.hash));
end = strchr(line, '\n');
if (line[0] != ':' || !end)
@@ -1734,21 +1760,23 @@ static void read_marks(void)
*end = 0;
mark = strtoumax(line + 1, &end, 10);
if (!mark || end == line + 1
- || *end != ' ' || get_oid_hex(end + 1, &oid))
+ || *end != ' '
+ || get_oid_hex_any(end + 1, &oid) == GIT_HASH_UNKNOWN)
die("corrupt mark line: %s", line);
- e = find_object(&oid);
- if (!e) {
- enum object_type type = oid_object_info(the_repository,
- &oid, NULL);
- if (type < 0)
- die("object not found: %s", oid_to_hex(&oid));
- e = insert_object(&oid);
- e->type = type;
- e->pack_id = MAX_PACK_ID;
- e->idx.offset = 1; /* just not zero! */
- }
- insert_mark(mark, e);
+ inserter(s, &oid, mark);
}
+}
+
+static void read_marks(void)
+{
+ FILE *f = fopen(import_marks_file, "r");
+ if (f)
+ ;
+ else if (import_marks_file_ignore_missing && errno == ENOENT)
+ goto done; /* Marks file does not exist */
+ else
+ die_errno("cannot read '%s'", import_marks_file);
+ read_mark_file(marks, f, insert_object_entry);
fclose(f);
done:
import_marks_file_done = 1;
@@ -2134,6 +2162,30 @@ static uintmax_t change_note_fanout(struct tree_entry *root,
return do_change_note_fanout(root, root, hex_oid, 0, path, 0, fanout);
}
+static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end)
+{
+ int algo;
+ khiter_t it;
+
+ /* Make SHA-1 object IDs have all-zero padding. */
+ memset(oid->hash, 0, sizeof(oid->hash));
+
+ algo = parse_oid_hex_any(hex, oid, end);
+ if (algo == GIT_HASH_UNKNOWN)
+ return -1;
+
+ it = kh_get_oid_map(sub_oid_map, *oid);
+ /* No such object? */
+ if (it == kh_end(sub_oid_map)) {
+ /* If we're using the same algorithm, pass it through. */
+ if (hash_algos[algo].format_id == the_hash_algo->format_id)
+ return 0;
+ return -1;
+ }
+ oidcpy(oid, kh_value(sub_oid_map, it));
+ return 0;
+}
+
/*
* Given a pointer into a string, parse a mark reference:
*
@@ -2214,13 +2266,13 @@ static void file_change_m(const char *p, struct branch *b)
}
if (*p == ':') {
- oe = find_mark(parse_mark_ref_space(&p));
+ oe = find_mark(marks, parse_mark_ref_space(&p));
oidcpy(&oid, &oe->idx.oid);
} else if (skip_prefix(p, "inline ", &p)) {
inline_data = 1;
oe = NULL; /* not used with inline_data, but makes gcc happy */
} else {
- if (parse_oid_hex(p, &oid, &p))
+ if (parse_mapped_oid_hex(p, &oid, &p))
die("Invalid dataref: %s", command_buf.buf);
oe = find_object(&oid);
if (*p++ != ' ')
@@ -2388,13 +2440,13 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
/* Now parse the notemodify command. */
/* <dataref> or 'inline' */
if (*p == ':') {
- oe = find_mark(parse_mark_ref_space(&p));
+ oe = find_mark(marks, parse_mark_ref_space(&p));
oidcpy(&oid, &oe->idx.oid);
} else if (skip_prefix(p, "inline ", &p)) {
inline_data = 1;
oe = NULL; /* not used with inline_data, but makes gcc happy */
} else {
- if (parse_oid_hex(p, &oid, &p))
+ if (parse_mapped_oid_hex(p, &oid, &p))
die("Invalid dataref: %s", command_buf.buf);
oe = find_object(&oid);
if (*p++ != ' ')
@@ -2409,7 +2461,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
oidcpy(&commit_oid, &s->oid);
} else if (*p == ':') {
uintmax_t commit_mark = parse_mark_ref_eol(p);
- struct object_entry *commit_oe = find_mark(commit_mark);
+ struct object_entry *commit_oe = find_mark(marks, commit_mark);
if (commit_oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", commit_mark);
oidcpy(&commit_oid, &commit_oe->idx.oid);
@@ -2513,7 +2565,7 @@ static int parse_objectish(struct branch *b, const char *objectish)
oidcpy(&b->branch_tree.versions[1].oid, t);
} else if (*objectish == ':') {
uintmax_t idnum = parse_mark_ref_eol(objectish);
- struct object_entry *oe = find_mark(idnum);
+ struct object_entry *oe = find_mark(marks, idnum);
if (oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", idnum);
if (!oideq(&b->oid, &oe->idx.oid)) {
@@ -2577,7 +2629,7 @@ static struct hash_list *parse_merge(unsigned int *count)
oidcpy(&n->oid, &s->oid);
else if (*from == ':') {
uintmax_t idnum = parse_mark_ref_eol(from);
- struct object_entry *oe = find_mark(idnum);
+ struct object_entry *oe = find_mark(marks, idnum);
if (oe->type != OBJ_COMMIT)
die("Mark :%" PRIuMAX " not a commit", idnum);
oidcpy(&n->oid, &oe->idx.oid);
@@ -2751,7 +2803,7 @@ static void parse_new_tag(const char *arg)
} else if (*from == ':') {
struct object_entry *oe;
from_mark = parse_mark_ref_eol(from);
- oe = find_mark(from_mark);
+ oe = find_mark(marks, from_mark);
type = oe->type;
oidcpy(&oid, &oe->idx.oid);
} else if (!get_oid(from, &oid)) {
@@ -2909,7 +2961,7 @@ static void parse_get_mark(const char *p)
if (*p != ':')
die("Not a mark: %s", p);
- oe = find_mark(parse_mark_ref_eol(p));
+ oe = find_mark(marks, parse_mark_ref_eol(p));
if (!oe)
die("Unknown mark: %s", command_buf.buf);
@@ -2924,12 +2976,12 @@ static void parse_cat_blob(const char *p)
/* cat-blob SP <object> LF */
if (*p == ':') {
- oe = find_mark(parse_mark_ref_eol(p));
+ oe = find_mark(marks, parse_mark_ref_eol(p));
if (!oe)
die("Unknown mark: %s", command_buf.buf);
oidcpy(&oid, &oe->idx.oid);
} else {
- if (parse_oid_hex(p, &oid, &p))
+ if (parse_mapped_oid_hex(p, &oid, &p))
die("Invalid dataref: %s", command_buf.buf);
if (*p)
die("Garbage after SHA1: %s", command_buf.buf);
@@ -2993,18 +3045,54 @@ static struct object_entry *dereference(struct object_entry *oe,
return find_object(oid);
}
+static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp)
+{
+ struct object_id *fromoid = object;
+ struct object_id *tooid = find_mark(cbp, mark);
+ int ret;
+ khiter_t it;
+
+ it = kh_put_oid_map(sub_oid_map, *fromoid, &ret);
+ /* We've already seen this object. */
+ if (ret == 0)
+ return;
+ kh_value(sub_oid_map, it) = tooid;
+}
+
+static void build_mark_map_one(struct mark_set *from, struct mark_set *to)
+{
+ for_each_mark(from, 0, insert_mapped_mark, to);
+}
+
+static void build_mark_map(struct string_list *from, struct string_list *to)
+{
+ struct string_list_item *fromp, *top;
+
+ sub_oid_map = kh_init_oid_map();
+
+ for_each_string_list_item(fromp, from) {
+ top = string_list_lookup(to, fromp->string);
+ if (!fromp->util) {
+ die(_("Missing from marks for submodule '%s'"), fromp->string);
+ } else if (!top || !top->util) {
+ die(_("Missing to marks for submodule '%s'"), fromp->string);
+ }
+ build_mark_map_one(fromp->util, top->util);
+ }
+}
+
static struct object_entry *parse_treeish_dataref(const char **p)
{
struct object_id oid;
struct object_entry *e;
if (**p == ':') { /* <mark> */
- e = find_mark(parse_mark_ref_space(p));
+ e = find_mark(marks, parse_mark_ref_space(p));
if (!e)
die("Unknown mark: %s", command_buf.buf);
oidcpy(&oid, &e->idx.oid);
} else { /* <sha1> */
- if (parse_oid_hex(*p, &oid, p))
+ if (parse_mapped_oid_hex(*p, &oid, p))
die("Invalid dataref: %s", command_buf.buf);
e = find_object(&oid);
if (*(*p)++ != ' ')
@@ -3130,7 +3218,7 @@ static void parse_alias(void)
die(_("Expected 'to' command, got %s"), command_buf.buf);
e = find_object(&b.oid);
assert(e);
- insert_mark(next_mark, e);
+ insert_mark(marks, next_mark, e);
}
static char* make_fast_import_path(const char *path)
@@ -3210,6 +3298,26 @@ static void option_export_pack_edges(const char *edges)
pack_edges = xfopen(edges, "a");
}
+static void option_rewrite_submodules(const char *arg, struct string_list *list)
+{
+ struct mark_set *ms;
+ FILE *fp;
+ char *s = xstrdup(arg);
+ char *f = strchr(s, ':');
+ if (!f)
+ die(_("Expected format name:filename for submodule rewrite option"));
+ *f = '\0';
+ f++;
+ ms = xcalloc(1, sizeof(*ms));
+ string_list_insert(list, s)->util = ms;
+
+ fp = fopen(f, "r");
+ if (!fp)
+ die_errno("cannot read '%s'", f);
+ read_mark_file(ms, fp, insert_oid_entry);
+ fclose(fp);
+}
+
static int parse_one_option(const char *option)
{
if (skip_prefix(option, "max-pack-size=", &option)) {
@@ -3272,6 +3380,11 @@ static int parse_one_feature(const char *feature, int from_stream)
option_export_marks(arg);
} else if (!strcmp(feature, "alias")) {
; /* Don't die - this feature is supported */
+ } else if (skip_prefix(feature, "rewrite-submodules-to=", &arg)) {
+ option_rewrite_submodules(arg, &sub_marks_to);
+ } else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
+ option_rewrite_submodules(arg, &sub_marks_from);
+ } else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
} else if (!strcmp(feature, "get-mark")) {
; /* Don't die - this feature is supported */
} else if (!strcmp(feature, "cat-blob")) {
@@ -3377,6 +3490,7 @@ static void parse_argv(void)
seen_data_command = 1;
if (import_marks_file)
read_marks();
+ build_mark_map(&sub_marks_from, &sub_marks_to);
}
int cmd_main(int argc, const char **argv)