diff options
author | Junio C Hamano <gitster@pobox.com> | 2019-01-04 13:33:33 -0800 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2019-01-04 13:33:33 -0800 |
commit | 4d59753227d6f86dec2b108704bc04e727c5347f (patch) | |
tree | f089f52a13268fe0cb53ef450f535bd5d4a99a70 /builtin/fast-export.c | |
parent | Merge branch 'nd/the-index' (diff) | |
parent | fast-export: add a --show-original-ids option to show original names (diff) | |
download | tgif-4d59753227d6f86dec2b108704bc04e727c5347f.tar.xz |
Merge branch 'en/fast-export-import'
Small fixes and features for fast-export and fast-import, mostly on
the fast-export side.
* en/fast-export-import:
fast-export: add a --show-original-ids option to show original names
fast-import: remove unmaintained duplicate documentation
fast-export: add --reference-excluded-parents option
fast-export: ensure we export requested refs
fast-export: when using paths, avoid corrupt stream with non-existent mark
fast-export: move commit rewriting logic into a function for reuse
fast-export: avoid dying when filtering by paths and old tags exist
fast-export: use value from correct enum
git-fast-export.txt: clarify misleading documentation about rev-list args
git-fast-import.txt: fix documentation for --quiet option
fast-export: convert sha1 to oid
Diffstat (limited to 'builtin/fast-export.c')
-rw-r--r-- | builtin/fast-export.c | 190 |
1 files changed, 137 insertions, 53 deletions
diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 5790f0d554..9e283482ef 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -31,13 +31,16 @@ static const char *fast_export_usage[] = { }; static int progress; -static enum { ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = ABORT; -static enum { ERROR, DROP, REWRITE } tag_of_filtered_mode = ERROR; +static enum { SIGNED_TAG_ABORT, VERBATIM, WARN, WARN_STRIP, STRIP } signed_tag_mode = SIGNED_TAG_ABORT; +static enum { TAG_FILTERING_ABORT, DROP, REWRITE } tag_of_filtered_mode = TAG_FILTERING_ABORT; static int fake_missing_tagger; static int use_done_feature; static int no_data; static int full_tree; +static int reference_excluded_commits; +static int show_original_ids; static struct string_list extra_refs = STRING_LIST_INIT_NODUP; +static struct string_list tag_refs = STRING_LIST_INIT_NODUP; static struct refspec refspecs = REFSPEC_INIT_FETCH; static int anonymize; static struct revision_sources revision_sources; @@ -46,7 +49,7 @@ static int parse_opt_signed_tag_mode(const struct option *opt, const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - signed_tag_mode = ABORT; + signed_tag_mode = SIGNED_TAG_ABORT; else if (!strcmp(arg, "verbatim") || !strcmp(arg, "ignore")) signed_tag_mode = VERBATIM; else if (!strcmp(arg, "warn")) @@ -64,7 +67,7 @@ static int parse_opt_tag_of_filtered_mode(const struct option *opt, const char *arg, int unset) { if (unset || !strcmp(arg, "abort")) - tag_of_filtered_mode = ERROR; + tag_of_filtered_mode = TAG_FILTERING_ABORT; else if (!strcmp(arg, "drop")) tag_of_filtered_mode = DROP; else if (!strcmp(arg, "rewrite")) @@ -187,6 +190,22 @@ static int get_object_mark(struct object *object) return ptr_to_mark(decoration); } +static struct commit *rewrite_commit(struct commit *p) +{ + for (;;) { + if (p->parents && p->parents->next) + break; + if (p->object.flags & UNINTERESTING) + break; + if (!(p->object.flags & TREESAME)) + break; + if (!p->parents) + return NULL; + p = p->parents->item; + } + return p; +} + static void show_progress(void) { static int counter = 0; @@ -243,7 +262,7 @@ static void export_blob(const struct object_id *oid) if (!buf) die("could not read blob %s", oid_to_hex(oid)); if (check_object_signature(oid, buf, size, type_name(type)) < 0) - die("sha1 mismatch in blob %s", oid_to_hex(oid)); + die("oid mismatch in blob %s", oid_to_hex(oid)); object = parse_object_buffer(the_repository, oid, type, size, buf, &eaten); } @@ -253,7 +272,10 @@ static void export_blob(const struct object_id *oid) mark_next_object(object); - printf("blob\nmark :%"PRIu32"\ndata %"PRIuMAX"\n", last_idnum, (uintmax_t)size); + printf("blob\nmark :%"PRIu32"\n", last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(oid)); + printf("data %"PRIuMAX"\n", (uintmax_t)size); if (size && fwrite(buf, size, 1, stdout) != 1) die_errno("could not write blob '%s'", oid_to_hex(oid)); printf("\n"); @@ -330,17 +352,18 @@ static void print_path(const char *path) static void *generate_fake_oid(const void *old, size_t *len) { - static uint32_t counter = 1; /* avoid null sha1 */ - unsigned char *out = xcalloc(GIT_SHA1_RAWSZ, 1); - put_be32(out + GIT_SHA1_RAWSZ - 4, counter++); + static uint32_t counter = 1; /* avoid null oid */ + const unsigned hashsz = the_hash_algo->rawsz; + unsigned char *out = xcalloc(hashsz, 1); + put_be32(out + hashsz - 4, counter++); return out; } -static const unsigned char *anonymize_sha1(const struct object_id *oid) +static const struct object_id *anonymize_oid(const struct object_id *oid) { - static struct hashmap sha1s; - size_t len = GIT_SHA1_RAWSZ; - return anonymize_mem(&sha1s, generate_fake_oid, oid, &len); + static struct hashmap objs; + size_t len = the_hash_algo->rawsz; + return anonymize_mem(&objs, generate_fake_oid, oid, &len); } static void show_filemodify(struct diff_queue_struct *q, @@ -399,9 +422,9 @@ static void show_filemodify(struct diff_queue_struct *q, */ if (no_data || S_ISGITLINK(spec->mode)) printf("M %06o %s ", spec->mode, - sha1_to_hex(anonymize ? - anonymize_sha1(&spec->oid) : - spec->oid.hash)); + oid_to_hex(anonymize ? + anonymize_oid(&spec->oid) : + &spec->oid)); else { struct object *object = lookup_object(the_repository, spec->oid.hash); @@ -579,7 +602,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, message += 2; if (commit->parents && - get_object_mark(&commit->parents->item->object) != 0 && + (get_object_mark(&commit->parents->item->object) != 0 || + reference_excluded_commits) && !full_tree) { parse_commit_or_die(commit->parents->item); diff_tree_oid(get_commit_tree_oid(commit->parents->item), @@ -595,6 +619,13 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, export_blob(&diff_queued_diff.queue[i]->two->oid); refname = *revision_sources_at(&revision_sources, commit); + /* + * FIXME: string_list_remove() below for each ref is overall + * O(N^2). Compared to a history walk and diffing trees, this is + * just lost in the noise in practice. However, theoretically a + * repo may have enough refs for this to become slow. + */ + string_list_remove(&extra_refs, refname, 0); if (anonymize) { refname = anonymize_refname(refname); anonymize_ident_line(&committer, &committer_end); @@ -608,8 +639,10 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, reencoded = reencode_string(message, "UTF-8", encoding); if (!commit->parents) printf("reset %s\n", refname); - printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s", - refname, last_idnum, + printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&commit->object.oid)); + printf("%.*s\n%.*s\ndata %u\n%s", (int)(author_end - author), author, (int)(committer_end - committer), committer, (unsigned)(reencoded @@ -620,13 +653,21 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, unuse_commit_buffer(commit, commit_buffer); for (i = 0, p = commit->parents; p; p = p->next) { - int mark = get_object_mark(&p->item->object); - if (!mark) + struct object *obj = &p->item->object; + int mark = get_object_mark(obj); + + if (!mark && !reference_excluded_commits) continue; if (i == 0) - printf("from :%d\n", mark); + printf("from "); + else + printf("merge "); + if (mark) + printf(":%d\n", mark); else - printf("merge :%d\n", mark); + printf("%s\n", oid_to_hex(anonymize ? + anonymize_oid(&obj->oid) : + &obj->oid)); i++; } @@ -727,7 +768,7 @@ static void handle_tag(const char *name, struct tag *tag) "\n-----BEGIN PGP SIGNATURE-----\n"); if (signature) switch(signed_tag_mode) { - case ABORT: + case SIGNED_TAG_ABORT: die("encountered signed tag %s; use " "--signed-tags=<mode> to handle it", oid_to_hex(&tag->object.oid)); @@ -752,7 +793,7 @@ static void handle_tag(const char *name, struct tag *tag) tagged_mark = get_object_mark(tagged); if (!tagged_mark) { switch(tag_of_filtered_mode) { - case ABORT: + case TAG_FILTERING_ABORT: die("tag %s tags unexported object; use " "--tag-of-filtered-object=<mode> to handle it", oid_to_hex(&tag->object.oid)); @@ -766,18 +807,12 @@ static void handle_tag(const char *name, struct tag *tag) oid_to_hex(&tag->object.oid), type_name(tagged->type)); } - p = (struct commit *)tagged; - for (;;) { - if (p->parents && p->parents->next) - break; - if (p->object.flags & UNINTERESTING) - break; - if (!(p->object.flags & TREESAME)) - break; - if (!p->parents) - die("can't find replacement commit for tag %s", - oid_to_hex(&tag->object.oid)); - p = p->parents->item; + p = rewrite_commit((struct commit *)tagged); + if (!p) { + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + free(buf); + return; } tagged_mark = get_object_mark(&p->object); } @@ -785,8 +820,10 @@ static void handle_tag(const char *name, struct tag *tag) if (starts_with(name, "refs/tags/")) name += 10; - printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n", - name, tagged_mark, + printf("tag %s\nfrom :%d\n", name, tagged_mark); + if (show_original_ids) + printf("original-oid %s\n", oid_to_hex(&tag->object.oid)); + printf("%.*s%sdata %d\n%.*s\n", (int)(tagger_end - tagger), tagger, tagger == tagger_end ? "" : "\n", (int)message_size, (int)message_size, message ? message : ""); @@ -804,7 +841,7 @@ static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name) /* handle nested tags */ while (tag && tag->object.type == OBJ_TAG) { parse_object(the_repository, &tag->object.oid); - string_list_append(&extra_refs, full_name)->util = tag; + string_list_append(&tag_refs, full_name)->util = tag; tag = (struct tag *)tag->tagged; } if (!tag) @@ -863,25 +900,30 @@ static void get_tags_and_duplicates(struct rev_cmdline_info *info) } /* - * This ref will not be updated through a commit, lets make - * sure it gets properly updated eventually. + * Make sure this ref gets properly updated eventually, whether + * through a commit or manually at the end. */ - if (*revision_sources_at(&revision_sources, commit) || - commit->object.flags & SHOWN) + if (e->item->type != OBJ_TAG) string_list_append(&extra_refs, full_name)->util = commit; + if (!*revision_sources_at(&revision_sources, commit)) *revision_sources_at(&revision_sources, commit) = full_name; } + + string_list_sort(&extra_refs); + string_list_remove_duplicates(&extra_refs, 0); } -static void handle_tags_and_duplicates(void) +static void handle_tags_and_duplicates(struct string_list *extras) { struct commit *commit; int i; - for (i = extra_refs.nr - 1; i >= 0; i--) { - const char *name = extra_refs.items[i].string; - struct object *object = extra_refs.items[i].util; + for (i = extras->nr - 1; i >= 0; i--) { + const char *name = extras->items[i].string; + struct object *object = extras->items[i].util; + int mark; + switch (object->type) { case OBJ_TAG: handle_tag(name, (struct tag *)object); @@ -890,9 +932,45 @@ static void handle_tags_and_duplicates(void) if (anonymize) name = anonymize_refname(name); /* create refs pointing to already seen commits */ - commit = (struct commit *)object; - printf("reset %s\nfrom :%d\n\n", name, - get_object_mark(&commit->object)); + commit = rewrite_commit((struct commit *)object); + if (!commit) { + /* + * Neither this object nor any of its + * ancestors touch any relevant paths, so + * it has been filtered to nothing. Delete + * it. + */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + + mark = get_object_mark(&commit->object); + if (!mark) { + /* + * Getting here means we have a commit which + * was excluded by a negative refspec (e.g. + * fast-export ^master master). If we are + * referencing excluded commits, set the ref + * to the exact commit. Otherwise, the user + * wants the branch exported but every commit + * in its history to be deleted, which basically + * just means deletion of the ref. + */ + if (!reference_excluded_commits) { + /* delete the ref */ + printf("reset %s\nfrom %s\n\n", + name, oid_to_hex(&null_oid)); + continue; + } + /* set ref to commit using oid, not mark */ + printf("reset %s\nfrom %s\n\n", name, + oid_to_hex(&commit->object.oid)); + continue; + } + + printf("reset %s\nfrom :%d\n\n", name, mark + ); show_progress(); break; } @@ -988,7 +1066,7 @@ static void handle_deletes(void) continue; printf("reset %s\nfrom %s\n\n", - refspec->dst, sha1_to_hex(null_sha1)); + refspec->dst, oid_to_hex(&null_oid)); } } @@ -1024,6 +1102,11 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) OPT_STRING_LIST(0, "refspec", &refspecs_list, N_("refspec"), N_("Apply refspec to exported refs")), OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")), + OPT_BOOL(0, "reference-excluded-parents", + &reference_excluded_commits, N_("Reference parents which are not in fast-export stream by object id")), + OPT_BOOL(0, "show-original-ids", &show_original_ids, + N_("Show original object ids of blobs/commits")), + OPT_END() }; @@ -1080,7 +1163,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix) } } - handle_tags_and_duplicates(); + handle_tags_and_duplicates(&extra_refs); + handle_tags_and_duplicates(&tag_refs); handle_deletes(); if (export_filename && lastimportid != last_idnum) |