diff options
Diffstat (limited to 'fast-import.c')
-rw-r--r-- | fast-import.c | 332 |
1 files changed, 237 insertions, 95 deletions
diff --git a/fast-import.c b/fast-import.c index b8b65a801c..0dfa14dc8c 100644 --- a/fast-import.c +++ b/fast-import.c @@ -18,6 +18,7 @@ #include "object-store.h" #include "mem-pool.h" #include "commit-reach.h" +#include "khash.h" #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1) @@ -38,12 +39,28 @@ struct object_entry { struct pack_idx_entry idx; - struct object_entry *next; + struct hashmap_entry ent; uint32_t type : TYPE_BITS, pack_id : PACK_ID_BITS, depth : DEPTH_BITS; }; +static int object_entry_hashcmp(const void *map_data, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, + const void *keydata) +{ + const struct object_id *oid = keydata; + const struct object_entry *e1, *e2; + + e1 = container_of(eptr, const struct object_entry, ent); + if (oid) + return oidcmp(&e1->idx.oid, oid); + + e2 = container_of(entry_or_key, const struct object_entry, ent); + return oidcmp(&e1->idx.oid, &e2->idx.oid); +} + struct object_entry_pool { struct object_entry_pool *next_pool; struct object_entry *next_free; @@ -53,6 +70,7 @@ struct object_entry_pool { struct mark_set { union { + struct object_id *oids[1024]; struct object_entry *marked[1024]; struct mark_set *sets[1024]; } data; @@ -121,6 +139,7 @@ struct hash_list { typedef enum { WHENSPEC_RAW = 1, + WHENSPEC_RAW_PERMISSIVE, WHENSPEC_RFC2822, WHENSPEC_NOW } whenspec_type; @@ -131,6 +150,9 @@ struct recent_command { char *buf; }; +typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark); +typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp); + /* Configured limits on output */ static unsigned long max_depth = 50; static off_t max_packsize; @@ -173,7 +195,7 @@ static off_t pack_size; /* Table of objects we've written. */ static unsigned int object_entry_alloc = 5000; static struct object_entry_pool *blocks; -static struct object_entry *object_table[1 << 16]; +static struct hashmap object_table; static struct mark_set *marks; static const char *export_marks_file; static const char *import_marks_file; @@ -222,6 +244,11 @@ static int allow_unsafe_features; /* Signal handling */ static volatile sig_atomic_t checkpoint_requested; +/* Submodule marks */ +static struct string_list sub_marks_from = STRING_LIST_INIT_DUP; +static struct string_list sub_marks_to = STRING_LIST_INIT_DUP; +static kh_oid_map_t *sub_oid_map; + /* Where to write output of cat-blob commands */ static int cat_blob_fd = STDOUT_FILENO; @@ -230,6 +257,29 @@ static void parse_get_mark(const char *p); static void parse_cat_blob(const char *p); static void parse_ls(const char *p, struct branch *b); +static void for_each_mark(struct mark_set *m, uintmax_t base, each_mark_fn_t callback, void *p) +{ + uintmax_t k; + if (m->shift) { + for (k = 0; k < 1024; k++) { + if (m->data.sets[k]) + for_each_mark(m->data.sets[k], base + (k << m->shift), callback, p); + } + } else { + for (k = 0; k < 1024; k++) { + if (m->data.marked[k]) + callback(base + k, m->data.marked[k], p); + } + } +} + +static void dump_marks_fn(uintmax_t mark, void *object, void *cbp) { + struct object_entry *e = object; + FILE *f = cbp; + + fprintf(f, ":%" PRIuMAX " %s\n", mark, oid_to_hex(&e->idx.oid)); +} + static void write_branch_report(FILE *rpt, struct branch *b) { fprintf(rpt, "%s:\n", b->name); @@ -258,8 +308,6 @@ static void write_branch_report(FILE *rpt, struct branch *b) fputc('\n', rpt); } -static void dump_marks_helper(FILE *, uintmax_t, struct mark_set *); - static void write_crash_report(const char *err) { char *loc = git_pathdup("fast_import_crash_%"PRIuMAX, (uintmax_t) getpid()); @@ -338,7 +386,7 @@ static void write_crash_report(const char *err) if (export_marks_file) fprintf(rpt, " exported to %s\n", export_marks_file); else - dump_marks_helper(rpt, 0, marks); + for_each_mark(marks, 0, dump_marks_fn, rpt); fputc('\n', rpt); fputs("-------------------\n", rpt); @@ -424,44 +472,37 @@ static struct object_entry *new_object(struct object_id *oid) static struct object_entry *find_object(struct object_id *oid) { - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; - struct object_entry *e; - for (e = object_table[h]; e; e = e->next) - if (oideq(oid, &e->idx.oid)) - return e; - return NULL; + return hashmap_get_entry_from_hash(&object_table, oidhash(oid), oid, + struct object_entry, ent); } static struct object_entry *insert_object(struct object_id *oid) { - unsigned int h = oid->hash[0] << 8 | oid->hash[1]; - struct object_entry *e = object_table[h]; + struct object_entry *e; + unsigned int hash = oidhash(oid); - while (e) { - if (oideq(oid, &e->idx.oid)) - return e; - e = e->next; + e = hashmap_get_entry_from_hash(&object_table, hash, oid, + struct object_entry, ent); + if (!e) { + e = new_object(oid); + e->idx.offset = 0; + hashmap_entry_init(&e->ent, hash); + hashmap_add(&object_table, &e->ent); } - e = new_object(oid); - e->next = object_table[h]; - e->idx.offset = 0; - object_table[h] = e; return e; } static void invalidate_pack_id(unsigned int id) { - unsigned int h; unsigned long lu; struct tag *t; + struct hashmap_iter iter; + struct object_entry *e; - for (h = 0; h < ARRAY_SIZE(object_table); h++) { - struct object_entry *e; - - for (e = object_table[h]; e; e = e->next) - if (e->pack_id == id) - e->pack_id = MAX_PACK_ID; + hashmap_for_each_entry(&object_table, &iter, e, ent) { + if (e->pack_id == id) + e->pack_id = MAX_PACK_ID; } for (lu = 0; lu < branch_table_sz; lu++) { @@ -493,9 +534,8 @@ static char *pool_strdup(const char *s) return r; } -static void insert_mark(uintmax_t idnum, struct object_entry *oe) +static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe) { - struct mark_set *s = marks; while ((idnum >> s->shift) >= 1024) { s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); s->shift = marks->shift + 10; @@ -516,10 +556,9 @@ static void insert_mark(uintmax_t idnum, struct object_entry *oe) s->data.marked[idnum] = oe; } -static struct object_entry *find_mark(uintmax_t idnum) +static void *find_mark(struct mark_set *s, uintmax_t idnum) { uintmax_t orig_idnum = idnum; - struct mark_set *s = marks; struct object_entry *oe = NULL; if ((idnum >> s->shift) < 1024) { while (s && s->shift) { @@ -919,7 +958,7 @@ static int store_object( e = insert_object(&oid); if (mark) - insert_mark(mark, e); + insert_mark(marks, mark, e); if (e->idx.offset) { duplicate_count_by_type[type]++; return 1; @@ -1117,7 +1156,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) e = insert_object(&oid); if (mark) - insert_mark(mark, e); + insert_mark(marks, mark, e); if (e->idx.offset) { duplicate_count_by_type[OBJ_BLOB]++; @@ -1655,26 +1694,6 @@ static void dump_tags(void) strbuf_release(&err); } -static void dump_marks_helper(FILE *f, - uintmax_t base, - struct mark_set *m) -{ - uintmax_t k; - if (m->shift) { - for (k = 0; k < 1024; k++) { - if (m->data.sets[k]) - dump_marks_helper(f, base + (k << m->shift), - m->data.sets[k]); - } - } else { - for (k = 0; k < 1024; k++) { - if (m->data.marked[k]) - fprintf(f, ":%" PRIuMAX " %s\n", base + k, - oid_to_hex(&m->data.marked[k]->idx.oid)); - } - } -} - static void dump_marks(void) { struct lock_file mark_lock = LOCK_INIT; @@ -1704,7 +1723,7 @@ static void dump_marks(void) return; } - dump_marks_helper(f, 0, marks); + for_each_mark(marks, 0, dump_marks_fn, f); if (commit_lock_file(&mark_lock)) { failure |= error_errno("Unable to write file %s", export_marks_file); @@ -1712,21 +1731,38 @@ static void dump_marks(void) } } -static void read_marks(void) +static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark) +{ + struct object_entry *e; + e = find_object(oid); + if (!e) { + enum object_type type = oid_object_info(the_repository, + oid, NULL); + if (type < 0) + die("object not found: %s", oid_to_hex(oid)); + e = insert_object(oid); + e->type = type; + e->pack_id = MAX_PACK_ID; + e->idx.offset = 1; /* just not zero! */ + } + insert_mark(s, mark, e); +} + +static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark) +{ + insert_mark(s, mark, xmemdupz(oid, sizeof(*oid))); +} + +static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter) { char line[512]; - FILE *f = fopen(import_marks_file, "r"); - if (f) - ; - else if (import_marks_file_ignore_missing && errno == ENOENT) - goto done; /* Marks file does not exist */ - else - die_errno("cannot read '%s'", import_marks_file); while (fgets(line, sizeof(line), f)) { uintmax_t mark; char *end; struct object_id oid; - struct object_entry *e; + + /* Ensure SHA-1 objects are padded with zeros. */ + memset(oid.hash, 0, sizeof(oid.hash)); end = strchr(line, '\n'); if (line[0] != ':' || !end) @@ -1734,21 +1770,23 @@ static void read_marks(void) *end = 0; mark = strtoumax(line + 1, &end, 10); if (!mark || end == line + 1 - || *end != ' ' || get_oid_hex(end + 1, &oid)) + || *end != ' ' + || get_oid_hex_any(end + 1, &oid) == GIT_HASH_UNKNOWN) die("corrupt mark line: %s", line); - e = find_object(&oid); - if (!e) { - enum object_type type = oid_object_info(the_repository, - &oid, NULL); - if (type < 0) - die("object not found: %s", oid_to_hex(&oid)); - e = insert_object(&oid); - e->type = type; - e->pack_id = MAX_PACK_ID; - e->idx.offset = 1; /* just not zero! */ - } - insert_mark(mark, e); + inserter(s, &oid, mark); } +} + +static void read_marks(void) +{ + FILE *f = fopen(import_marks_file, "r"); + if (f) + ; + else if (import_marks_file_ignore_missing && errno == ENOENT) + goto done; /* Marks file does not exist */ + else + die_errno("cannot read '%s'", import_marks_file); + read_mark_file(marks, f, insert_object_entry); fclose(f); done: import_marks_file_done = 1; @@ -1874,7 +1912,7 @@ static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res) return 1; } -static int validate_raw_date(const char *src, struct strbuf *result) +static int validate_raw_date(const char *src, struct strbuf *result, int strict) { const char *orig_src = src; char *endp; @@ -1883,7 +1921,11 @@ static int validate_raw_date(const char *src, struct strbuf *result) errno = 0; num = strtoul(src, &endp, 10); - /* NEEDSWORK: perhaps check for reasonable values? */ + /* + * NEEDSWORK: perhaps check for reasonable values? For example, we + * could error on values representing times more than a + * day in the future. + */ if (errno || endp == src || *endp != ' ') return -1; @@ -1892,7 +1934,13 @@ static int validate_raw_date(const char *src, struct strbuf *result) return -1; num = strtoul(src + 1, &endp, 10); - if (errno || endp == src + 1 || *endp || 1400 < num) + /* + * NEEDSWORK: check for brokenness other than num > 1400, such as + * (num % 100) >= 60, or ((num % 100) % 15) != 0 ? + */ + if (errno || endp == src + 1 || *endp || /* did not parse */ + (strict && (1400 < num)) /* parsed a broken timezone */ + ) return -1; strbuf_addstr(result, orig_src); @@ -1926,7 +1974,11 @@ static char *parse_ident(const char *buf) switch (whenspec) { case WHENSPEC_RAW: - if (validate_raw_date(ltgt, &ident) < 0) + if (validate_raw_date(ltgt, &ident, 1) < 0) + die("Invalid raw date \"%s\" in ident: %s", ltgt, buf); + break; + case WHENSPEC_RAW_PERMISSIVE: + if (validate_raw_date(ltgt, &ident, 0) < 0) die("Invalid raw date \"%s\" in ident: %s", ltgt, buf); break; case WHENSPEC_RFC2822: @@ -2134,6 +2186,30 @@ static uintmax_t change_note_fanout(struct tree_entry *root, return do_change_note_fanout(root, root, hex_oid, 0, path, 0, fanout); } +static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end) +{ + int algo; + khiter_t it; + + /* Make SHA-1 object IDs have all-zero padding. */ + memset(oid->hash, 0, sizeof(oid->hash)); + + algo = parse_oid_hex_any(hex, oid, end); + if (algo == GIT_HASH_UNKNOWN) + return -1; + + it = kh_get_oid_map(sub_oid_map, *oid); + /* No such object? */ + if (it == kh_end(sub_oid_map)) { + /* If we're using the same algorithm, pass it through. */ + if (hash_algos[algo].format_id == the_hash_algo->format_id) + return 0; + return -1; + } + oidcpy(oid, kh_value(sub_oid_map, it)); + return 0; +} + /* * Given a pointer into a string, parse a mark reference: * @@ -2214,13 +2290,13 @@ static void file_change_m(const char *p, struct branch *b) } if (*p == ':') { - oe = find_mark(parse_mark_ref_space(&p)); + oe = find_mark(marks, parse_mark_ref_space(&p)); oidcpy(&oid, &oe->idx.oid); } else if (skip_prefix(p, "inline ", &p)) { inline_data = 1; oe = NULL; /* not used with inline_data, but makes gcc happy */ } else { - if (parse_oid_hex(p, &oid, &p)) + if (parse_mapped_oid_hex(p, &oid, &p)) die("Invalid dataref: %s", command_buf.buf); oe = find_object(&oid); if (*p++ != ' ') @@ -2388,13 +2464,13 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa /* Now parse the notemodify command. */ /* <dataref> or 'inline' */ if (*p == ':') { - oe = find_mark(parse_mark_ref_space(&p)); + oe = find_mark(marks, parse_mark_ref_space(&p)); oidcpy(&oid, &oe->idx.oid); } else if (skip_prefix(p, "inline ", &p)) { inline_data = 1; oe = NULL; /* not used with inline_data, but makes gcc happy */ } else { - if (parse_oid_hex(p, &oid, &p)) + if (parse_mapped_oid_hex(p, &oid, &p)) die("Invalid dataref: %s", command_buf.buf); oe = find_object(&oid); if (*p++ != ' ') @@ -2409,7 +2485,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa oidcpy(&commit_oid, &s->oid); } else if (*p == ':') { uintmax_t commit_mark = parse_mark_ref_eol(p); - struct object_entry *commit_oe = find_mark(commit_mark); + struct object_entry *commit_oe = find_mark(marks, commit_mark); if (commit_oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", commit_mark); oidcpy(&commit_oid, &commit_oe->idx.oid); @@ -2513,7 +2589,7 @@ static int parse_objectish(struct branch *b, const char *objectish) oidcpy(&b->branch_tree.versions[1].oid, t); } else if (*objectish == ':') { uintmax_t idnum = parse_mark_ref_eol(objectish); - struct object_entry *oe = find_mark(idnum); + struct object_entry *oe = find_mark(marks, idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); if (!oideq(&b->oid, &oe->idx.oid)) { @@ -2577,7 +2653,7 @@ static struct hash_list *parse_merge(unsigned int *count) oidcpy(&n->oid, &s->oid); else if (*from == ':') { uintmax_t idnum = parse_mark_ref_eol(from); - struct object_entry *oe = find_mark(idnum); + struct object_entry *oe = find_mark(marks, idnum); if (oe->type != OBJ_COMMIT) die("Mark :%" PRIuMAX " not a commit", idnum); oidcpy(&n->oid, &oe->idx.oid); @@ -2751,7 +2827,7 @@ static void parse_new_tag(const char *arg) } else if (*from == ':') { struct object_entry *oe; from_mark = parse_mark_ref_eol(from); - oe = find_mark(from_mark); + oe = find_mark(marks, from_mark); type = oe->type; oidcpy(&oid, &oe->idx.oid); } else if (!get_oid(from, &oid)) { @@ -2909,7 +2985,7 @@ static void parse_get_mark(const char *p) if (*p != ':') die("Not a mark: %s", p); - oe = find_mark(parse_mark_ref_eol(p)); + oe = find_mark(marks, parse_mark_ref_eol(p)); if (!oe) die("Unknown mark: %s", command_buf.buf); @@ -2924,12 +3000,12 @@ static void parse_cat_blob(const char *p) /* cat-blob SP <object> LF */ if (*p == ':') { - oe = find_mark(parse_mark_ref_eol(p)); + oe = find_mark(marks, parse_mark_ref_eol(p)); if (!oe) die("Unknown mark: %s", command_buf.buf); oidcpy(&oid, &oe->idx.oid); } else { - if (parse_oid_hex(p, &oid, &p)) + if (parse_mapped_oid_hex(p, &oid, &p)) die("Invalid dataref: %s", command_buf.buf); if (*p) die("Garbage after SHA1: %s", command_buf.buf); @@ -2993,18 +3069,54 @@ static struct object_entry *dereference(struct object_entry *oe, return find_object(oid); } +static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp) +{ + struct object_id *fromoid = object; + struct object_id *tooid = find_mark(cbp, mark); + int ret; + khiter_t it; + + it = kh_put_oid_map(sub_oid_map, *fromoid, &ret); + /* We've already seen this object. */ + if (ret == 0) + return; + kh_value(sub_oid_map, it) = tooid; +} + +static void build_mark_map_one(struct mark_set *from, struct mark_set *to) +{ + for_each_mark(from, 0, insert_mapped_mark, to); +} + +static void build_mark_map(struct string_list *from, struct string_list *to) +{ + struct string_list_item *fromp, *top; + + sub_oid_map = kh_init_oid_map(); + + for_each_string_list_item(fromp, from) { + top = string_list_lookup(to, fromp->string); + if (!fromp->util) { + die(_("Missing from marks for submodule '%s'"), fromp->string); + } else if (!top || !top->util) { + die(_("Missing to marks for submodule '%s'"), fromp->string); + } + build_mark_map_one(fromp->util, top->util); + } +} + static struct object_entry *parse_treeish_dataref(const char **p) { struct object_id oid; struct object_entry *e; if (**p == ':') { /* <mark> */ - e = find_mark(parse_mark_ref_space(p)); + e = find_mark(marks, parse_mark_ref_space(p)); if (!e) die("Unknown mark: %s", command_buf.buf); oidcpy(&oid, &e->idx.oid); } else { /* <sha1> */ - if (parse_oid_hex(*p, &oid, p)) + if (parse_mapped_oid_hex(*p, &oid, p)) die("Invalid dataref: %s", command_buf.buf); e = find_object(&oid); if (*(*p)++ != ' ') @@ -3130,7 +3242,7 @@ static void parse_alias(void) die(_("Expected 'to' command, got %s"), command_buf.buf); e = find_object(&b.oid); assert(e); - insert_mark(next_mark, e); + insert_mark(marks, next_mark, e); } static char* make_fast_import_path(const char *path) @@ -3161,6 +3273,8 @@ static void option_date_format(const char *fmt) { if (!strcmp(fmt, "raw")) whenspec = WHENSPEC_RAW; + else if (!strcmp(fmt, "raw-permissive")) + whenspec = WHENSPEC_RAW_PERMISSIVE; else if (!strcmp(fmt, "rfc2822")) whenspec = WHENSPEC_RFC2822; else if (!strcmp(fmt, "now")) @@ -3210,6 +3324,26 @@ static void option_export_pack_edges(const char *edges) pack_edges = xfopen(edges, "a"); } +static void option_rewrite_submodules(const char *arg, struct string_list *list) +{ + struct mark_set *ms; + FILE *fp; + char *s = xstrdup(arg); + char *f = strchr(s, ':'); + if (!f) + die(_("Expected format name:filename for submodule rewrite option")); + *f = '\0'; + f++; + ms = xcalloc(1, sizeof(*ms)); + string_list_insert(list, s)->util = ms; + + fp = fopen(f, "r"); + if (!fp) + die_errno("cannot read '%s'", f); + read_mark_file(ms, fp, insert_oid_entry); + fclose(fp); +} + static int parse_one_option(const char *option) { if (skip_prefix(option, "max-pack-size=", &option)) { @@ -3272,6 +3406,11 @@ static int parse_one_feature(const char *feature, int from_stream) option_export_marks(arg); } else if (!strcmp(feature, "alias")) { ; /* Don't die - this feature is supported */ + } else if (skip_prefix(feature, "rewrite-submodules-to=", &arg)) { + option_rewrite_submodules(arg, &sub_marks_to); + } else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) { + option_rewrite_submodules(arg, &sub_marks_from); + } else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) { } else if (!strcmp(feature, "get-mark")) { ; /* Don't die - this feature is supported */ } else if (!strcmp(feature, "cat-blob")) { @@ -3377,6 +3516,7 @@ static void parse_argv(void) seen_data_command = 1; if (import_marks_file) read_marks(); + build_mark_map(&sub_marks_from, &sub_marks_to); } int cmd_main(int argc, const char **argv) @@ -3397,6 +3537,8 @@ int cmd_main(int argc, const char **argv) avail_tree_table = xcalloc(avail_tree_table_sz, sizeof(struct avail_tree_content*)); marks = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set)); + hashmap_init(&object_table, object_entry_hashcmp, NULL, 0); + /* * We don't parse most options until after we've seen the set of * "feature" lines at the start of the stream (which allows the command |