diff options
Diffstat (limited to 'builtin/fast-export.c')
-rw-r--r-- | builtin/fast-export.c | 245 |
1 files changed, 186 insertions, 59 deletions
diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 74f3bf5c96..c22cef3b2f 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -31,13 +31,17 @@ static const char *fast_export_usage[] = { }; static int progress; -static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT; -static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR; +static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT; +static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT; +static enum { REENCODE_ABORT, REENCODE_YES, REENCODE_NO } reencode_mode = REENCODE_ABORT; static int fake_missing_tagger; static int use_done_feature; static int no_data; static int full_tree; +static int reference_excluded_commits; +static int show_original_ids; static struct string_list extra_refs = STRING_LIST_INIT_NODUP; +static struct string_list tag_refs = STRING_LIST_INIT_NODUP; static struct refspec refspecs = REFSPEC_INIT_FETCH; static int anonymize; static struct revision_sources revision_sources; @@ -46,7 +50,7 @@ static int parse_opt_signed_tag_mode(const struct option *opt, const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - signed_tag_mode = ABORT; + signed_tag_mode = SIGNED_TAG_ABORT; else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) signed_tag_mode = VERBATIM; else if (!strcmp(arg, "warn")) @@ -64,7 +68,7 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt, const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - tag_of_filtered_mode = ERROR; + tag_of_filtered_mode = TAG_FILTERING_ABORT; else if (!strcmp(arg, "drop")) tag_of_filtered_mode = DROP; else if (!strcmp(arg, "rewrite")) @@ -74,6 +78,31 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt, return 0; } +static int parse_opt_reencode_mode(const struct option *opt, + const char *arg, int unset) +{ + if (unset) { + reencode_mode = REENCODE_ABORT; + return 0; + } + + switch (git_parse_maybe_bool(arg)) { + case 0: + reencode_mode = REENCODE_NO; + break; + case 1: + reencode_mode = REENCODE_YES; + break; + default: + if (!strcasecmp(arg, "abort")) + reencode_mode = REENCODE_ABORT; + else + return error("Unknown reencoding mode: %s", arg); + } + + return 0; +} + static struct decoration idnums; static uint32_t last_idnum; @@ -187,6 +216,22 @@ static int get_object_mark(struct object *object) return ptr_to_mark(decoration); } +static struct commit *rewrite_commit(struct commit *p) +{ + for (;;) { + if (p->parents && p->parents->next) + break; + if (p->object.flags & UNINTERESTING) + break; + if (!(p->object.flags & TREESAME)) + break; + if (!p->parents) + return NULL; + p = p->parents->item; + } + return p; +} + static void show_progress(void) { static int counter = 0; @@ -243,7 +288,7 @@ static void export_blob(const struct object_id *oid) if (!buf) die("could not read blob %s", oid_to_hex(oid)); if (check_object_signature(oid, buf, size, type_name(type)) < 0) - die("sha1 mismatch in blob %s", oid_to_hex(oid)); + die("oid mismatch in blob %s", oid_to_hex(oid)); object = parse_object_buffer(the_repository, oid, type, size, buf, &eaten); } @@ -253,7 +298,10 @@ static void export_blob(const struct object_id *oid) mark_next_object(object); - printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size); + printf("blob\nmark :%"PRIu32"\n", last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(oid)); + printf("data %"PRIuMAX"\n", (uintmax_t)size); if (size && fwrite(buf, size, 1, stdout) != 1) die_errno("could not write blob '%s'", oid_to_hex(oid)); printf("\n"); @@ -330,17 +378,18 @@ static void print_path(const char *path) static void *generate_fake_oid(const void *old, size_t *len) { - static uint32_t counter = 1; /* avoid null sha1 */ - unsigned char *out = xcalloc(GIT_SHA1_RAWSZ, 1); - put_be32(out + GIT_SHA1_RAWSZ - 4, counter++); + static uint32_t counter = 1; /* avoid null oid */ + const unsigned hashsz = the_hash_algo->rawsz; + unsigned char *out = xcalloc(hashsz, 1); + put_be32(out + hashsz - 4, counter++); return out; } -static const unsigned char *anonymize_sha1(const struct object_id *oid) +static const struct object_id *anonymize_oid(const struct object_id *oid) { - static struct hashmap sha1s; - size_t len = GIT_SHA1_RAWSZ; - return anonymize_mem(&sha1s, generate_fake_oid, oid, &len); + static struct hashmap objs; + size_t len = the_hash_algo->rawsz; + return anonymize_mem(&objs, generate_fake_oid, oid, &len); } static void show_filemodify(struct diff_queue_struct *q, @@ -399,9 +448,9 @@ static void show_filemodify(struct diff_queue_struct *q, */ if (no_data || S_ISGITLINK(spec->mode)) printf("M %06o %s ", spec->mode, - sha1_to_hex(anonymize ? - anonymize_sha1(&spec->oid) : - spec->oid.hash)); + oid_to_hex(anonymize ? + anonymize_oid(&spec->oid) : + &spec->oid)); else { struct object *object = lookup_object(the_repository, spec->oid.hash); @@ -430,7 +479,7 @@ static const char *find_encoding(const char *begin, const char *end) bol = memmem(begin, end ? end - begin : strlen(begin), needle, strlen(needle)); if (!bol) - return git_commit_encoding; + return NULL; bol += strlen(needle); eol = strchrnul(bol, '\n'); *eol = '\0'; @@ -579,7 +628,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, message += 2; if (commit->parents && - get_object_mark(&commit->parents->item->object) != 0 && + (get_object_mark(&commit->parents->item->object) != 0 || + reference_excluded_commits) && !full_tree) { parse_commit_or_die(commit->parents->item); diff_tree_oid(get_commit_tree_oid(commit->parents->item), @@ -595,6 +645,13 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, export_blob(&diff_queued_diff.queue[i]->two->oid); refname = *revision_sources_at(&revision_sources, commit); + /* + * FIXME: string_list_remove() below for each ref is overall + * O(N^2). Compared to a history walk and diffing trees, this is + * just lost in the noise in practice. However, theoretically a + * repo may have enough refs for this to become slow. + */ + string_list_remove(&extra_refs, refname, 0); if (anonymize) { refname = anonymize_refname(refname); anonymize_ident_line(&committer, &committer_end); @@ -602,16 +659,32 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, } mark_next_object(&commit->object); - if (anonymize) + if (anonymize) { reencoded = anonymize_commit_message(message); - else if (!is_encoding_utf8(encoding)) - reencoded = reencode_string(message, "UTF-8", encoding); + } else if (encoding) { + switch(reencode_mode) { + case REENCODE_YES: + reencoded = reencode_string(message, "UTF-8", encoding); + break; + case REENCODE_NO: + break; + case REENCODE_ABORT: + die("Encountered commit-specific encoding %s in commit " + "%s; use --reencode=[yes|no] to handle it", + encoding, oid_to_hex(&commit->object.oid)); + } + } if (!commit->parents) printf("reset %s\n", refname); - printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s", - refname, last_idnum, + printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&commit->object.oid)); + printf("%.*s\n%.*s\n", (int)(author_end - author), author, - (int)(committer_end - committer), committer, + (int)(committer_end - committer), committer); + if (!reencoded && encoding) + printf("encoding %s\n", encoding); + printf("data %u\n%s", (unsigned)(reencoded ? strlen(reencoded) : message ? strlen(message) : 0), @@ -620,13 +693,21 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, unuse_commit_buffer(commit, commit_buffer); for (i = 0, p = commit->parents; p; p = p->next) { - int mark = get_object_mark(&p->item->object); - if (!mark) + struct object *obj = &p->item->object; + int mark = get_object_mark(obj); + + if (!mark && !reference_excluded_commits) continue; if (i == 0) - printf("from :%d\n", mark); + printf("from "); else - printf("merge :%d\n", mark); + printf("merge "); + if (mark) + printf(":%d\n", mark); + else + printf("%s\n", oid_to_hex(anonymize ? + anonymize_oid(&obj->oid) : + &obj->oid)); i++; } @@ -727,7 +808,7 @@ static void handle_tag(const char *name, struct tag *tag) "\n-----BEGIN PGP SIGNATURE-----\n"); if (signature) switch(signed_tag_mode) { - case ABORT: + case SIGNED_TAG_ABORT: die("encountered signed tag %s; use " "--signed-tags=<mode> to handle it", oid_to_hex(&tag->object.oid)); @@ -752,7 +833,7 @@ static void handle_tag(const char *name, struct tag *tag) tagged_mark = get_object_mark(tagged); if (!tagged_mark) { switch(tag_of_filtered_mode) { - case ABORT: + case TAG_FILTERING_ABORT: die("tag %s tags unexported object; use " "--tag-of-filtered-object=<mode> to handle it", oid_to_hex(&tag->object.oid)); @@ -766,18 +847,12 @@ static void handle_tag(const char *name, struct tag *tag) oid_to_hex(&tag->object.oid), type_name(tagged->type)); } - p = (struct commit *)tagged; - for (;;) { - if (p->parents && p->parents->next) - break; - if (p->object.flags & UNINTERESTING) - break; - if (!(p->object.flags & TREESAME)) - break; - if (!p->parents) - die("can't find replacement commit for tag %s", - oid_to_hex(&tag->object.oid)); - p = p->parents->item; + p = rewrite_commit((struct commit *)tagged); + if (!p) { + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + free(buf); + return; } tagged_mark = get_object_mark(&p->object); } @@ -785,8 +860,10 @@ static void handle_tag(const char *name, struct tag *tag) if (starts_with(name, "refs/tags/")) name += 10; - printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n", - name, tagged_mark, + printf("tag %s\nfrom :%d\n", name, tagged_mark); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&tag->object.oid)); + printf("%.*s%sdata %d\n%.*s\n", (int)(tagger_end - tagger), tagger, tagger == tagger_end ? "" : "\n", (int)message_size, (int)message_size, message ? message : ""); @@ -804,7 +881,7 @@ static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name) /* handle nested tags */ while (tag && tag->object.type == OBJ_TAG) { parse_object(the_repository, &tag->object.oid); - string_list_append(&extra_refs, full_name)->util = tag; + string_list_append(&tag_refs, full_name)->util = tag; tag = (struct tag *)tag->tagged; } if (!tag) @@ -863,25 +940,30 @@ static void get_tags_and_duplicates(struct rev_cmdline_info *info) } /* - * This ref will not be updated through a commit, lets make - * sure it gets properly updated eventually. + * Make sure this ref gets properly updated eventually, whether + * through a commit or manually at the end. */ - if (*revision_sources_at(&revision_sources, commit) || - commit->object.flags & SHOWN) + if (e->item->type != OBJ_TAG) string_list_append(&extra_refs, full_name)->util = commit; + if (!*revision_sources_at(&revision_sources, commit)) *revision_sources_at(&revision_sources, commit) = full_name; } + + string_list_sort(&extra_refs); + string_list_remove_duplicates(&extra_refs, 0); } -static void handle_tags_and_duplicates(void) +static void handle_tags_and_duplicates(struct string_list *extras) { struct commit *commit; int i; - for (i = extra_refs.nr - 1; i >= 0; i--) { - const char *name = extra_refs.items[i].string; - struct object *object = extra_refs.items[i].util; + for (i = extras->nr - 1; i >= 0; i--) { + const char *name = extras->items[i].string; + struct object *object = extras->items[i].util; + int mark; + switch (object->type) { case OBJ_TAG: handle_tag(name, (struct tag *)object); @@ -890,9 +972,45 @@ static void handle_tags_and_duplicates(void) if (anonymize) name = anonymize_refname(name); /* create refs pointing to already seen commits */ - commit = (struct commit *)object; - printf("reset %s\nfrom :%d\n\n", name, - get_object_mark(&commit->object)); + commit = rewrite_commit((struct commit *)object); + if (!commit) { + /* + * Neither this object nor any of its + * ancestors touch any relevant paths, so + * it has been filtered to nothing. Delete + * it. + */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + + mark = get_object_mark(&commit->object); + if (!mark) { + /* + * Getting here means we have a commit which + * was excluded by a negative refspec (e.g. + * fast-export ^master master). If we are + * referencing excluded commits, set the ref + * to the exact commit. Otherwise, the user + * wants the branch exported but every commit + * in its history to be deleted, which basically + * just means deletion of the ref. + */ + if (!reference_excluded_commits) { + /* delete the ref */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + /* set ref to commit using oid, not mark */ + printf("reset %s\nfrom %s\n\n", name, + oid_to_hex(&commit->object.oid)); + continue; + } + + printf("reset %s\nfrom :%d\n\n", name, mark + ); show_progress(); break; } @@ -988,7 +1106,7 @@ static void handle_deletes(void) continue; printf("reset %s\nfrom %s\n\n", - refspec->dst, sha1_to_hex(null_sha1)); + refspec->dst, oid_to_hex(&null_oid)); } } @@ -1010,6 +1128,9 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) OPT_CALLBACK(0, "tag-of-filtered-object", &tag_of_filtered_mode, N_("mode"), N_("select handling of tags that tag filtered objects"), parse_opt_tag_of_filtered_mode), + OPT_CALLBACK(0, "reencode", &reencode_mode, N_("mode"), + N_("select handling of commit messages in an alternate encoding"), + parse_opt_reencode_mode), OPT_STRING(0, "export-marks", &export_filename, N_("file"), N_("Dump marks to this file")), OPT_STRING(0, "import-marks", &import_filename, N_("file"), @@ -1024,6 +1145,11 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"), N_("Apply refspec to exported refs")), OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")), + OPT_BOOL(0, "reference-excluded-parents", + &reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")), + OPT_BOOL(0, "show-original-ids", &show_original_ids, + N_("Show original object ids of blobs/commits")), + OPT_END() }; @@ -1033,7 +1159,7 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) /* we handle encodings */ git_config(git_default_config, NULL); - init_revisions(&revs, prefix); + repo_init_revisions(the_repository, &revs, prefix); init_revision_sources(&revision_sources); revs.topo_order = 1; revs.sources = &revision_sources; @@ -1080,7 +1206,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) } } - handle_tags_and_duplicates(); + handle_tags_and_duplicates(&extra_refs); + handle_tags_and_duplicates(&tag_refs); handle_deletes(); if (export_filename && lastimportid != last_idnum) |