diff options
Diffstat (limited to 'builtin/fast-export.c')
-rw-r--r-- | builtin/fast-export.c | 325 |
1 files changed, 245 insertions, 80 deletions
diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 9bd8a14b57..85868162ee 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -31,13 +31,18 @@ static const char *fast_export_usage[] = { }; static int progress; -static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT; -static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR; +static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT; +static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT; +static enum { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT; static int fake_missing_tagger; static int use_done_feature; static int no_data; static int full_tree; +static int reference_excluded_commits; +static int show_original_ids; +static int mark_tags; static struct string_list extra_refs = STRING_LIST_INIT_NODUP; +static struct string_list tag_refs = STRING_LIST_INIT_NODUP; static struct refspec refspecs = REFSPEC_INIT_FETCH; static int anonymize; static struct revision_sources revision_sources; @@ -46,7 +51,7 @@ static int parse_opt_signed_tag_mode(const struct option *opt, const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - signed_tag_mode = ABORT; + signed_tag_mode = SIGNED_TAG_ABORT; else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) signed_tag_mode = VERBATIM; else if (!strcmp(arg, "warn")) @@ -64,7 +69,7 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt, const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - tag_of_filtered_mode = ERROR; + tag_of_filtered_mode = TAG_FILTERING_ABORT; else if (!strcmp(arg, "drop")) tag_of_filtered_mode = DROP; else if (!strcmp(arg, "rewrite")) @@ -74,6 +79,31 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt, return 0; } +static int parse_opt_reencode_mode(const struct option *opt, + const char *arg, int unset) +{ + if (unset) { + reencode_mode = REENCODE_ABORT; + return 0; + } + + switch (git_parse_maybe_bool(arg)) { + case 0: + reencode_mode = REENCODE_NO; + break; + case 1: + reencode_mode = REENCODE_YES; + break; + default: + if (!strcasecmp(arg, "abort")) + reencode_mode = REENCODE_ABORT; + else + return error("Unknown reencoding mode: %s", arg); + } + + return 0; +} + static struct decoration idnums; static uint32_t last_idnum; @@ -97,10 +127,15 @@ struct anonymized_entry { }; static int anonymized_entry_cmp(const void *unused_cmp_data, - const void *va, const void *vb, + const struct hashmap_entry *eptr, + const struct hashmap_entry *entry_or_key, const void *unused_keydata) { - const struct anonymized_entry *a = va, *b = vb; + const struct anonymized_entry *a, *b; + + a = container_of(eptr, const struct anonymized_entry, hash); + b = container_of(entry_or_key, const struct anonymized_entry, hash); + return a->orig_len != b->orig_len || memcmp(a->orig, b->orig, a->orig_len); } @@ -119,10 +154,10 @@ static const void *anonymize_mem(struct hashmap *map, if (!map->cmpfn) hashmap_init(map, anonymized_entry_cmp, NULL, 0); - hashmap_entry_init(&key, memhash(orig, *len)); + hashmap_entry_init(&key.hash, memhash(orig, *len)); key.orig = orig; key.orig_len = *len; - ret = hashmap_get(map, &key, NULL); + ret = hashmap_get_entry(map, &key, hash, NULL); if (!ret) { ret = xmalloc(sizeof(*ret)); @@ -131,7 +166,7 @@ static const void *anonymize_mem(struct hashmap *map, ret->orig_len = *len; ret->anon = generate(orig, len); ret->anon_len = *len; - hashmap_put(map, ret); + hashmap_put(map, &ret->hash); } *len = ret->anon_len; @@ -187,6 +222,22 @@ static int get_object_mark(struct object *object) return ptr_to_mark(decoration); } +static struct commit *rewrite_commit(struct commit *p) +{ + for (;;) { + if (p->parents && p->parents->next) + break; + if (p->object.flags & UNINTERESTING) + break; + if (!(p->object.flags & TREESAME)) + break; + if (!p->parents) + return NULL; + p = p->parents->item; + } + return p; +} + static void show_progress(void) { static int counter = 0; @@ -230,7 +281,7 @@ static void export_blob(const struct object_id *oid) if (is_null_oid(oid)) return; - object = lookup_object(the_repository, oid->hash); + object = lookup_object(the_repository, oid); if (object && object->flags & SHOWN) return; @@ -242,8 +293,9 @@ static void export_blob(const struct object_id *oid) buf = read_object_file(oid, &type, &size); if (!buf) die("could not read blob %s", oid_to_hex(oid)); - if (check_object_signature(oid, buf, size, type_name(type)) < 0) - die("sha1 mismatch in blob %s", oid_to_hex(oid)); + if (check_object_signature(the_repository, oid, buf, size, + type_name(type)) < 0) + die("oid mismatch in blob %s", oid_to_hex(oid)); object = parse_object_buffer(the_repository, oid, type, size, buf, &eaten); } @@ -253,7 +305,10 @@ static void export_blob(const struct object_id *oid) mark_next_object(object); - printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size); + printf("blob\nmark :%"PRIu32"\n", last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(oid)); + printf("data %"PRIuMAX"\n", (uintmax_t)size); if (size && fwrite(buf, size, 1, stdout) != 1) die_errno("could not write blob '%s'", oid_to_hex(oid)); printf("\n"); @@ -330,17 +385,18 @@ static void print_path(const char *path) static void *generate_fake_oid(const void *old, size_t *len) { - static uint32_t counter = 1; /* avoid null sha1 */ - unsigned char *out = xcalloc(GIT_SHA1_RAWSZ, 1); - put_be32(out + GIT_SHA1_RAWSZ - 4, counter++); + static uint32_t counter = 1; /* avoid null oid */ + const unsigned hashsz = the_hash_algo->rawsz; + unsigned char *out = xcalloc(hashsz, 1); + put_be32(out + hashsz - 4, counter++); return out; } -static const unsigned char *anonymize_sha1(const struct object_id *oid) +static const struct object_id *anonymize_oid(const struct object_id *oid) { - static struct hashmap sha1s; - size_t len = GIT_SHA1_RAWSZ; - return anonymize_mem(&sha1s, generate_fake_oid, oid, &len); + static struct hashmap objs; + size_t len = the_hash_algo->rawsz; + return anonymize_mem(&objs, generate_fake_oid, oid, &len); } static void show_filemodify(struct diff_queue_struct *q, @@ -384,7 +440,7 @@ static void show_filemodify(struct diff_queue_struct *q, string_list_insert(changed, spec->path); putchar('\n'); - if (!oidcmp(&ospec->oid, &spec->oid) && + if (oideq(&ospec->oid, &spec->oid) && ospec->mode == spec->mode) break; } @@ -399,12 +455,12 @@ static void show_filemodify(struct diff_queue_struct *q, */ if (no_data || S_ISGITLINK(spec->mode)) printf("M %06o %s ", spec->mode, - sha1_to_hex(anonymize ? - anonymize_sha1(&spec->oid) : - spec->oid.hash)); + oid_to_hex(anonymize ? + anonymize_oid(&spec->oid) : + &spec->oid)); else { struct object *object = lookup_object(the_repository, - spec->oid.hash); + &spec->oid); printf("M %06o :%d ", spec->mode, get_object_mark(object)); } @@ -430,7 +486,7 @@ static const char *find_encoding(const char *begin, const char *end) bol = memmem(begin, end ? end - begin : strlen(begin), needle, strlen(needle)); if (!bol) - return git_commit_encoding; + return NULL; bol += strlen(needle); eol = strchrnul(bol, '\n'); *eol = '\0'; @@ -579,7 +635,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, message += 2; if (commit->parents && - get_object_mark(&commit->parents->item->object) != 0 && + (get_object_mark(&commit->parents->item->object) != 0 || + reference_excluded_commits) && !full_tree) { parse_commit_or_die(commit->parents->item); diff_tree_oid(get_commit_tree_oid(commit->parents->item), @@ -595,6 +652,13 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, export_blob(&diff_queued_diff.queue[i]->two->oid); refname = *revision_sources_at(&revision_sources, commit); + /* + * FIXME: string_list_remove() below for each ref is overall + * O(N^2). Compared to a history walk and diffing trees, this is + * just lost in the noise in practice. However, theoretically a + * repo may have enough refs for this to become slow. + */ + string_list_remove(&extra_refs, refname, 0); if (anonymize) { refname = anonymize_refname(refname); anonymize_ident_line(&committer, &committer_end); @@ -602,16 +666,32 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, } mark_next_object(&commit->object); - if (anonymize) + if (anonymize) { reencoded = anonymize_commit_message(message); - else if (!is_encoding_utf8(encoding)) - reencoded = reencode_string(message, "UTF-8", encoding); + } else if (encoding) { + switch(reencode_mode) { + case REENCODE_YES: + reencoded = reencode_string(message, "UTF-8", encoding); + break; + case REENCODE_NO: + break; + case REENCODE_ABORT: + die("Encountered commit-specific encoding %s in commit " + "%s; use --reencode=[yes|no] to handle it", + encoding, oid_to_hex(&commit->object.oid)); + } + } if (!commit->parents) printf("reset %s\n", refname); - printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s", - refname, last_idnum, + printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&commit->object.oid)); + printf("%.*s\n%.*s\n", (int)(author_end - author), author, - (int)(committer_end - committer), committer, + (int)(committer_end - committer), committer); + if (!reencoded && encoding) + printf("encoding %s\n", encoding); + printf("data %u\n%s", (unsigned)(reencoded ? strlen(reencoded) : message ? strlen(message) : 0), @@ -620,13 +700,21 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, unuse_commit_buffer(commit, commit_buffer); for (i = 0, p = commit->parents; p; p = p->next) { - int mark = get_object_mark(&p->item->object); - if (!mark) + struct object *obj = &p->item->object; + int mark = get_object_mark(obj); + + if (!mark && !reference_excluded_commits) continue; if (i == 0) - printf("from :%d\n", mark); + printf("from "); + else + printf("merge "); + if (mark) + printf(":%d\n", mark); else - printf("merge :%d\n", mark); + printf("%s\n", oid_to_hex(anonymize ? + anonymize_oid(&obj->oid) : + &obj->oid)); i++; } @@ -727,7 +815,7 @@ static void handle_tag(const char *name, struct tag *tag) "\n-----BEGIN PGP SIGNATURE-----\n"); if (signature) switch(signed_tag_mode) { - case ABORT: + case SIGNED_TAG_ABORT: die("encountered signed tag %s; use " "--signed-tags=<mode> to handle it", oid_to_hex(&tag->object.oid)); @@ -752,7 +840,7 @@ static void handle_tag(const char *name, struct tag *tag) tagged_mark = get_object_mark(tagged); if (!tagged_mark) { switch(tag_of_filtered_mode) { - case ABORT: + case TAG_FILTERING_ABORT: die("tag %s tags unexported object; use " "--tag-of-filtered-object=<mode> to handle it", oid_to_hex(&tag->object.oid)); @@ -761,32 +849,42 @@ static void handle_tag(const char *name, struct tag *tag) free(buf); return; case REWRITE: - if (tagged->type != OBJ_COMMIT) { - die("tag %s tags unexported %s!", - oid_to_hex(&tag->object.oid), - type_name(tagged->type)); + if (tagged->type == OBJ_TAG && !mark_tags) { + die(_("Error: Cannot export nested tags unless --mark-tags is specified.")); + } else if (tagged->type == OBJ_COMMIT) { + p = rewrite_commit((struct commit *)tagged); + if (!p) { + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + free(buf); + return; + } + tagged_mark = get_object_mark(&p->object); + } else { + /* tagged->type is either OBJ_BLOB or OBJ_TAG */ + tagged_mark = get_object_mark(tagged); } - p = (struct commit *)tagged; - for (;;) { - if (p->parents && p->parents->next) - break; - if (p->object.flags & UNINTERESTING) - break; - if (!(p->object.flags & TREESAME)) - break; - if (!p->parents) - die("can't find replacement commit for tag %s", - oid_to_hex(&tag->object.oid)); - p = p->parents->item; - } - tagged_mark = get_object_mark(&p->object); } } - if (starts_with(name, "refs/tags/")) - name += 10; - printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n", - name, tagged_mark, + if (tagged->type == OBJ_TAG) { + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + } + skip_prefix(name, "refs/tags/", &name); + printf("tag %s\n", name); + if (mark_tags) { + mark_next_object(&tag->object); + printf("mark :%"PRIu32"\n", last_idnum); + } + if (tagged_mark) + printf("from :%d\n", tagged_mark); + else + printf("from %s\n", oid_to_hex(&tagged->oid)); + + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&tag->object.oid)); + printf("%.*s%sdata %d\n%.*s\n", (int)(tagger_end - tagger), tagger, tagger == tagger_end ? "" : "\n", (int)message_size, (int)message_size, message ? message : ""); @@ -804,7 +902,7 @@ static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name) /* handle nested tags */ while (tag && tag->object.type == OBJ_TAG) { parse_object(the_repository, &tag->object.oid); - string_list_append(&extra_refs, full_name)->util = tag; + string_list_append(&tag_refs, full_name)->util = tag; tag = (struct tag *)tag->tagged; } if (!tag) @@ -863,25 +961,30 @@ static void get_tags_and_duplicates(struct rev_cmdline_info *info) } /* - * This ref will not be updated through a commit, lets make - * sure it gets properly updated eventually. + * Make sure this ref gets properly updated eventually, whether + * through a commit or manually at the end. */ - if (*revision_sources_at(&revision_sources, commit) || - commit->object.flags & SHOWN) + if (e->item->type != OBJ_TAG) string_list_append(&extra_refs, full_name)->util = commit; + if (!*revision_sources_at(&revision_sources, commit)) *revision_sources_at(&revision_sources, commit) = full_name; } + + string_list_sort(&extra_refs); + string_list_remove_duplicates(&extra_refs, 0); } -static void handle_tags_and_duplicates(void) +static void handle_tags_and_duplicates(struct string_list *extras) { struct commit *commit; int i; - for (i = extra_refs.nr - 1; i >= 0; i--) { - const char *name = extra_refs.items[i].string; - struct object *object = extra_refs.items[i].util; + for (i = extras->nr - 1; i >= 0; i--) { + const char *name = extras->items[i].string; + struct object *object = extras->items[i].util; + int mark; + switch (object->type) { case OBJ_TAG: handle_tag(name, (struct tag *)object); @@ -890,9 +993,45 @@ static void handle_tags_and_duplicates(void) if (anonymize) name = anonymize_refname(name); /* create refs pointing to already seen commits */ - commit = (struct commit *)object; - printf("reset %s\nfrom :%d\n\n", name, - get_object_mark(&commit->object)); + commit = rewrite_commit((struct commit *)object); + if (!commit) { + /* + * Neither this object nor any of its + * ancestors touch any relevant paths, so + * it has been filtered to nothing. Delete + * it. + */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + + mark = get_object_mark(&commit->object); + if (!mark) { + /* + * Getting here means we have a commit which + * was excluded by a negative refspec (e.g. + * fast-export ^master master). If we are + * referencing excluded commits, set the ref + * to the exact commit. Otherwise, the user + * wants the branch exported but every commit + * in its history to be deleted, which basically + * just means deletion of the ref. + */ + if (!reference_excluded_commits) { + /* delete the ref */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + /* set ref to commit using oid, not mark */ + printf("reset %s\nfrom %s\n\n", name, + oid_to_hex(&commit->object.oid)); + continue; + } + + printf("reset %s\nfrom :%d\n\n", name, mark + ); show_progress(); break; } @@ -929,11 +1068,16 @@ static void export_marks(char *file) error("Unable to write marks file %s.", file); } -static void import_marks(char *input_file) +static void import_marks(char *input_file, int check_exists) { char line[512]; - FILE *f = xfopen(input_file, "r"); + FILE *f; + struct stat sb; + if (check_exists && stat(input_file, &sb)) + return; + + f = xfopen(input_file, "r"); while (fgets(line, sizeof(line), f)) { uint32_t mark; char *line_end, *mark_end; @@ -988,7 +1132,7 @@ static void handle_deletes(void) continue; printf("reset %s\nfrom %s\n\n", - refspec->dst, sha1_to_hex(null_sha1)); + refspec->dst, oid_to_hex(&null_oid)); } } @@ -997,7 +1141,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) struct rev_info revs; struct object_array commits = OBJECT_ARRAY_INIT; struct commit *commit; - char *export_filename = NULL, *import_filename = NULL; + char *export_filename = NULL, + *import_filename = NULL, + *import_filename_if_exists = NULL; uint32_t lastimportid; struct string_list refspecs_list = STRING_LIST_INIT_NODUP; struct string_list paths_of_changed_objects = STRING_LIST_INIT_DUP; @@ -1010,10 +1156,17 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"), N_("select handling of tags that tag filtered objects"), parse_opt_tag_of_filtered_mode), + OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"), + N_("select handling of commit messages in an alternate encoding"), + parse_opt_reencode_mode), OPT_STRING(0, "export-marks", &export_filename, N_("file"), N_("Dump marks to this file")), OPT_STRING(0, "import-marks", &import_filename, N_("file"), N_("Import marks from this file")), + OPT_STRING(0, "import-marks-if-exists", + &import_filename_if_exists, + N_("file"), + N_("Import marks from this file if it exists")), OPT_BOOL(0, "fake-missing-tagger", &fake_missing_tagger, N_("Fake a tagger when tags lack one")), OPT_BOOL(0, "full-tree", &full_tree, @@ -1024,6 +1177,13 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"), N_("Apply refspec to exported refs")), OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")), + OPT_BOOL(0, "reference-excluded-parents", + &reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")), + OPT_BOOL(0, "show-original-ids", &show_original_ids, + N_("Show original object ids of blobs/commits")), + OPT_BOOL(0, "mark-tags", &mark_tags, + N_("Label tags with mark ids")), + OPT_END() }; @@ -1033,7 +1193,7 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) /* we handle encodings */ git_config(git_default_config, NULL); - init_revisions(&revs, prefix); + repo_init_revisions(the_repository, &revs, prefix); init_revision_sources(&revision_sources); revs.topo_order = 1; revs.sources = &revision_sources; @@ -1056,8 +1216,12 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) if (use_done_feature) printf("feature done\n"); + if (import_filename && import_filename_if_exists) + die(_("Cannot pass both --import-marks and --import-marks-if-exists")); if (import_filename) - import_marks(import_filename); + import_marks(import_filename, 0); + else if (import_filename_if_exists) + import_marks(import_filename_if_exists, 1); lastimportid = last_idnum; if (import_filename && revs.prune_data.nr) @@ -1080,7 +1244,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) } } - handle_tags_and_duplicates(); + handle_tags_and_duplicates(&extra_refs); + handle_tags_and_duplicates(&tag_refs); handle_deletes(); if (export_filename && lastimportid != last_idnum) |